21 #include "kmp_error.h" 22 #include "kmp_stats.h" 25 #include "ompt-internal.h" 26 #include "ompt-specific.h" 29 #define MAX_MESSAGE 512 50 if (__kmp_ignore_mppbeg() == FALSE) {
51 __kmp_internal_begin();
53 KC_TRACE( 10, (
"__kmpc_begin: called\n" ) );
71 if (__kmp_ignore_mppend() == FALSE) {
72 KC_TRACE( 10, (
"__kmpc_end: called\n" ) );
73 KA_TRACE( 30, (
"__kmpc_end\n" ));
75 __kmp_internal_end_thread( -1 );
101 kmp_int32 gtid = __kmp_entry_gtid();
103 KC_TRACE( 10, (
"__kmpc_global_thread_num: T#%d\n", gtid ) );
124 KC_TRACE(10,(
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 return TCR_4(__kmp_all_nth);
138 KC_TRACE( 10, (
"__kmpc_bound_thread_num: called\n" ) );
139 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
150 KC_TRACE( 10, (
"__kmpc_bound_num_threads: called\n" ) );
152 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
174 if (__kmp_par_range == 0) {
181 semi2 = strchr(semi2,
';');
185 semi2 = strchr(semi2 + 1,
';');
189 if (__kmp_par_range_filename[0]) {
190 const char *name = semi2 - 1;
191 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
194 if ((*name ==
'/') || (*name ==
';')) {
197 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
198 return __kmp_par_range < 0;
201 semi3 = strchr(semi2 + 1,
';');
202 if (__kmp_par_range_routine[0]) {
203 if ((semi3 != NULL) && (semi3 > semi2)
204 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
205 return __kmp_par_range < 0;
208 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
209 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
210 return __kmp_par_range > 0;
212 return __kmp_par_range < 0;
228 return __kmp_entry_thread() -> th.th_root -> r.r_active;
243 KA_TRACE( 20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
244 global_tid, num_threads ) );
246 __kmp_push_num_threads( loc, global_tid, num_threads );
250 __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid )
252 KA_TRACE( 20, (
"__kmpc_pop_num_threads: enter\n" ) );
261 __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
263 KA_TRACE( 20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
264 global_tid, proc_bind ) );
266 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
284 int gtid = __kmp_entry_gtid();
286 #if (KMP_STATS_ENABLED) 301 va_start( ap, microtask );
304 ompt_frame_t* ompt_frame;
306 kmp_info_t *master_th = __kmp_threads[ gtid ];
307 kmp_team_t *parent_team = master_th->th.th_team;
308 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
310 ompt_frame = &(lwt->ompt_task_info.frame);
313 int tid = __kmp_tid_from_gtid( gtid );
314 ompt_frame = &(parent_team->t.t_implicit_task_taskdata[tid].
315 ompt_task_info.frame);
317 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
321 #if INCLUDE_SSC_MARKS 324 __kmp_fork_call( loc, gtid, fork_context_intel,
327 VOLATILE_CAST(
void *) microtask,
329 VOLATILE_CAST(microtask_t) microtask,
330 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
332 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
338 #if INCLUDE_SSC_MARKS 341 __kmp_join_call( loc, gtid
367 KA_TRACE( 20, (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
368 global_tid, num_teams, num_threads ) );
370 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
385 int gtid = __kmp_entry_gtid();
386 kmp_info_t *this_thr = __kmp_threads[ gtid ];
388 va_start( ap, microtask );
393 this_thr->th.th_teams_microtask = microtask;
394 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level;
397 kmp_team_t *parent_team = this_thr->th.th_team;
398 int tid = __kmp_tid_from_gtid( gtid );
400 parent_team->t.t_implicit_task_taskdata[tid].
401 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
406 if ( this_thr->th.th_teams_size.nteams == 0 ) {
407 __kmp_push_num_teams( loc, gtid, 0, 0 );
409 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
413 __kmp_fork_call( loc, gtid, fork_context_intel,
416 VOLATILE_CAST(
void *) microtask,
418 VOLATILE_CAST(microtask_t) __kmp_teams_master,
419 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
420 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
426 __kmp_join_call( loc, gtid
432 this_thr->th.th_teams_microtask = NULL;
433 this_thr->th.th_teams_level = 0;
434 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
447 __kmpc_invoke_task_func(
int gtid )
449 return __kmp_invoke_task_func( gtid );
470 __kmp_serialized_parallel(loc, global_tid);
483 kmp_internal_control_t *top;
484 kmp_info_t *this_thr;
485 kmp_team_t *serial_team;
487 KC_TRACE( 10, (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
495 if( ! TCR_4( __kmp_init_parallel ) )
496 __kmp_parallel_initialize();
498 this_thr = __kmp_threads[ global_tid ];
499 serial_team = this_thr->th.th_serial_team;
502 kmp_task_team_t * task_team = this_thr->th.th_task_team;
505 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
506 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) );
510 KMP_DEBUG_ASSERT( serial_team );
511 KMP_ASSERT( serial_team -> t.t_serialized );
512 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
513 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
514 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
515 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
518 top = serial_team -> t.t_control_stack_top;
519 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
520 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
521 serial_team -> t.t_control_stack_top = top -> next;
526 serial_team -> t.t_level--;
529 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
531 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
532 serial_team->t.t_dispatch->th_disp_buffer =
533 serial_team->t.t_dispatch->th_disp_buffer->next;
534 __kmp_free( disp_buffer );
537 -- serial_team -> t.t_serialized;
538 if ( serial_team -> t.t_serialized == 0 ) {
542 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 543 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
544 __kmp_clear_x87_fpu_status_word();
545 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
546 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
550 this_thr -> th.th_team = serial_team -> t.t_parent;
551 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
554 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc;
555 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0];
556 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
559 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
560 t.t_dispatch[ serial_team -> t.t_master_tid ];
562 __kmp_pop_current_task_from_thread( this_thr );
564 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
565 this_thr -> th.th_current_task -> td_flags.executing = 1;
567 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
569 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
570 KA_TRACE( 20, (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
571 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
574 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
575 KA_TRACE( 20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
576 global_tid, serial_team, serial_team -> t.t_serialized ) );
580 if ( __kmp_env_consistency_check )
581 __kmp_pop_parallel( global_tid, NULL );
595 KC_TRACE( 10, (
"__kmpc_flush: called\n" ) );
600 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) 613 if ( ! __kmp_cpuinfo.initialized ) {
614 __kmp_query_cpuid( & __kmp_cpuinfo );
616 if ( ! __kmp_cpuinfo.sse2 ) {
621 #elif KMP_COMPILER_MSVC 624 __sync_synchronize();
625 #endif // KMP_COMPILER_ICC 628 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 644 #error Unknown or unsupported architecture 664 KC_TRACE( 10, (
"__kmpc_barrier: called T#%d\n", global_tid ) );
666 if (! TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
669 if ( __kmp_env_consistency_check ) {
671 KMP_WARNING( ConstructIdentInvalid );
674 __kmp_check_barrier( global_tid, ct_barrier, loc );
677 #if OMPT_SUPPORT && OMPT_TRACE 678 ompt_frame_t * ompt_frame;
680 ompt_frame = __ompt_get_task_frame_internal(0);
681 if ( ompt_frame->reenter_runtime_frame == NULL )
682 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
685 __kmp_threads[ global_tid ]->th.th_ident = loc;
693 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
694 #if OMPT_SUPPORT && OMPT_TRACE 696 ompt_frame->reenter_runtime_frame = NULL;
713 KC_TRACE( 10, (
"__kmpc_master: called T#%d\n", global_tid ) );
715 if( ! TCR_4( __kmp_init_parallel ) )
716 __kmp_parallel_initialize();
718 if( KMP_MASTER_GTID( global_tid )) {
720 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
724 #if OMPT_SUPPORT && OMPT_TRACE 727 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
728 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
729 kmp_team_t *team = this_thr -> th.th_team;
731 int tid = __kmp_tid_from_gtid( global_tid );
732 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
733 team->t.ompt_team_info.parallel_id,
734 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
739 if ( __kmp_env_consistency_check ) {
740 #if KMP_USE_DYNAMIC_LOCK 742 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
744 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
747 __kmp_push_sync( global_tid, ct_master, loc, NULL );
749 __kmp_check_sync( global_tid, ct_master, loc, NULL );
767 KC_TRACE( 10, (
"__kmpc_end_master: called T#%d\n", global_tid ) );
769 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
770 KMP_POP_PARTITIONED_TIMER();
772 #if OMPT_SUPPORT && OMPT_TRACE 773 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
774 kmp_team_t *team = this_thr -> th.th_team;
776 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
777 int tid = __kmp_tid_from_gtid( global_tid );
778 ompt_callbacks.ompt_callback(ompt_event_master_end)(
779 team->t.ompt_team_info.parallel_id,
780 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
784 if ( __kmp_env_consistency_check ) {
786 KMP_WARNING( ThreadIdentInvalid );
788 if( KMP_MASTER_GTID( global_tid ))
789 __kmp_pop_sync( global_tid, ct_master, loc );
805 KMP_DEBUG_ASSERT( __kmp_init_serial );
807 KC_TRACE( 10, (
"__kmpc_ordered: called T#%d\n", gtid ));
809 if (! TCR_4(__kmp_init_parallel))
810 __kmp_parallel_initialize();
813 __kmp_itt_ordered_prep( gtid );
817 th = __kmp_threads[ gtid ];
819 #if OMPT_SUPPORT && OMPT_TRACE 822 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
823 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
826 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
827 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
828 th->th.ompt_thread_info.wait_id);
833 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
834 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
836 __kmp_parallel_deo( & gtid, & cid, loc );
838 #if OMPT_SUPPORT && OMPT_TRACE 841 th->th.ompt_thread_info.state = ompt_state_work_parallel;
842 th->th.ompt_thread_info.wait_id = 0;
845 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
846 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
847 th->th.ompt_thread_info.wait_id);
853 __kmp_itt_ordered_start( gtid );
870 KC_TRACE( 10, (
"__kmpc_end_ordered: called T#%d\n", gtid ) );
873 __kmp_itt_ordered_end( gtid );
877 th = __kmp_threads[ gtid ];
879 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
880 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
882 __kmp_parallel_dxo( & gtid, & cid, loc );
884 #if OMPT_SUPPORT && OMPT_BLAME 886 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
887 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
888 th->th.ompt_thread_info.wait_id);
893 #if KMP_USE_DYNAMIC_LOCK 895 static __forceinline
void 896 __kmp_init_indirect_csptr(kmp_critical_name * crit,
ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
900 kmp_indirect_lock_t **lck;
901 lck = (kmp_indirect_lock_t **)crit;
902 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
903 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
904 KMP_SET_I_LOCK_LOCATION(ilk, loc);
905 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
906 KA_TRACE(20, (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
908 __kmp_itt_critical_creating(ilk->lock, loc);
910 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
913 __kmp_itt_critical_destroyed(ilk->lock);
918 KMP_DEBUG_ASSERT(*lck != NULL);
922 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \ 923 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 924 if (l->lk.poll != KMP_LOCK_FREE(tas) || \ 925 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ 927 KMP_FSYNC_PREPARE(l); \ 928 KMP_INIT_YIELD(spins); \ 929 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 932 KMP_YIELD_SPIN(spins); \ 934 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 935 while (l->lk.poll != KMP_LOCK_FREE(tas) || \ 936 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ 937 __kmp_spin_backoff(&backoff); \ 938 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 941 KMP_YIELD_SPIN(spins); \ 945 KMP_FSYNC_ACQUIRED(l); \ 949 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \ 950 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 951 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ 952 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \ 956 #define KMP_RELEASE_TAS_LOCK(lock, gtid) { \ 957 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ 964 # include <sys/syscall.h> 966 # define FUTEX_WAIT 0 969 # define FUTEX_WAKE 1 973 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \ 974 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 975 kmp_int32 gtid_code = (gtid+1) << 1; \ 977 KMP_FSYNC_PREPARE(ftx); \ 978 kmp_int32 poll_val; \ 979 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 980 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 981 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 983 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \ 986 poll_val |= KMP_LOCK_BUSY(1, futex); \ 989 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \ 994 KMP_FSYNC_ACQUIRED(ftx); \ 998 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \ 999 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1000 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1 << 1, futex))) { \ 1001 KMP_FSYNC_ACQUIRED(ftx); \ 1009 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \ 1010 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1012 KMP_FSYNC_RELEASING(ftx); \ 1013 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1014 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1015 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1018 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ 1021 #endif // KMP_USE_FUTEX 1023 #else // KMP_USE_DYNAMIC_LOCK 1025 static kmp_user_lock_p
1026 __kmp_get_critical_section_ptr( kmp_critical_name * crit,
ident_t const * loc, kmp_int32 gtid )
1028 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1034 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1036 if ( lck == NULL ) {
1041 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1042 __kmp_init_user_lock_with_checks( lck );
1043 __kmp_set_user_lock_location( lck, loc );
1045 __kmp_itt_critical_creating( lck );
1058 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1060 if ( status == 0 ) {
1063 __kmp_itt_critical_destroyed( lck );
1067 __kmp_destroy_user_lock_with_checks( lck );
1068 __kmp_user_lock_free( &idx, gtid, lck );
1069 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1070 KMP_DEBUG_ASSERT( lck != NULL );
1076 #endif // KMP_USE_DYNAMIC_LOCK 1091 #if KMP_USE_DYNAMIC_LOCK 1092 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1095 KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait);
1096 kmp_user_lock_p lck;
1098 KC_TRACE( 10, (
"__kmpc_critical: called T#%d\n", global_tid ) );
1102 KMP_CHECK_USER_LOCK_INIT();
1104 if ( ( __kmp_user_lock_kind == lk_tas )
1105 && (
sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1106 lck = (kmp_user_lock_p)crit;
1109 else if ( ( __kmp_user_lock_kind == lk_futex )
1110 && (
sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1111 lck = (kmp_user_lock_p)crit;
1115 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1118 if ( __kmp_env_consistency_check )
1119 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1128 __kmp_itt_critical_acquiring( lck );
1131 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1134 __kmp_itt_critical_acquired( lck );
1138 KA_TRACE( 15, (
"__kmpc_critical: done T#%d\n", global_tid ));
1139 #endif // KMP_USE_DYNAMIC_LOCK 1142 #if KMP_USE_DYNAMIC_LOCK 1145 static __forceinline kmp_dyna_lockseq_t
1146 __kmp_map_hint_to_lock(uintptr_t hint)
1149 # define KMP_TSX_LOCK(seq) lockseq_##seq 1151 # define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1154 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1155 # define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1157 # define KMP_CPUINFO_RTM 0 1161 if (hint & kmp_lock_hint_hle)
1162 return KMP_TSX_LOCK(hle);
1163 if (hint & kmp_lock_hint_rtm)
1164 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
1165 if (hint & kmp_lock_hint_adaptive)
1166 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
1169 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1170 return __kmp_user_lock_seq;
1171 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1172 return __kmp_user_lock_seq;
1175 if (hint & omp_lock_hint_contended)
1176 return lockseq_queuing;
1179 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1183 if (hint & omp_lock_hint_speculative)
1184 return KMP_TSX_LOCK(hle);
1186 return __kmp_user_lock_seq;
1202 __kmpc_critical_with_hint(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1205 kmp_user_lock_p lck;
1207 KC_TRACE( 10, (
"__kmpc_critical: called T#%d\n", global_tid ) );
1209 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1212 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1213 if (KMP_IS_D_LOCK(lckseq)) {
1214 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1216 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1221 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1222 lck = (kmp_user_lock_p)lk;
1223 if (__kmp_env_consistency_check) {
1224 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1227 __kmp_itt_critical_acquiring(lck);
1229 # if KMP_USE_INLINED_TAS 1230 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1231 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1233 # elif KMP_USE_INLINED_FUTEX 1234 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1235 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1239 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1242 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1244 if (__kmp_env_consistency_check) {
1245 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1248 __kmp_itt_critical_acquiring(lck);
1250 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1254 __kmp_itt_critical_acquired( lck );
1257 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1258 KA_TRACE( 15, (
"__kmpc_critical: done T#%d\n", global_tid ));
1261 #endif // KMP_USE_DYNAMIC_LOCK 1275 kmp_user_lock_p lck;
1277 KC_TRACE( 10, (
"__kmpc_end_critical: called T#%d\n", global_tid ));
1279 #if KMP_USE_DYNAMIC_LOCK 1280 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1281 lck = (kmp_user_lock_p)crit;
1282 KMP_ASSERT(lck != NULL);
1283 if (__kmp_env_consistency_check) {
1284 __kmp_pop_sync(global_tid, ct_critical, loc);
1287 __kmp_itt_critical_releasing( lck );
1289 # if KMP_USE_INLINED_TAS 1290 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1291 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1293 # elif KMP_USE_INLINED_FUTEX 1294 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1295 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1299 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1302 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1303 KMP_ASSERT(ilk != NULL);
1305 if (__kmp_env_consistency_check) {
1306 __kmp_pop_sync(global_tid, ct_critical, loc);
1309 __kmp_itt_critical_releasing( lck );
1311 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1314 #else // KMP_USE_DYNAMIC_LOCK 1316 if ( ( __kmp_user_lock_kind == lk_tas )
1317 && (
sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1318 lck = (kmp_user_lock_p)crit;
1321 else if ( ( __kmp_user_lock_kind == lk_futex )
1322 && (
sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1323 lck = (kmp_user_lock_p)crit;
1327 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1330 KMP_ASSERT(lck != NULL);
1332 if ( __kmp_env_consistency_check )
1333 __kmp_pop_sync( global_tid, ct_critical, loc );
1336 __kmp_itt_critical_releasing( lck );
1339 __kmp_release_user_lock_with_checks( lck, global_tid );
1341 #if OMPT_SUPPORT && OMPT_BLAME 1343 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1344 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1349 #endif // KMP_USE_DYNAMIC_LOCK 1350 KMP_POP_PARTITIONED_TIMER();
1351 KA_TRACE( 15, (
"__kmpc_end_critical: done T#%d\n", global_tid ));
1367 KC_TRACE( 10, (
"__kmpc_barrier_master: called T#%d\n", global_tid ) );
1369 if (! TCR_4(__kmp_init_parallel))
1370 __kmp_parallel_initialize();
1372 if ( __kmp_env_consistency_check )
1373 __kmp_check_barrier( global_tid, ct_barrier, loc );
1376 __kmp_threads[global_tid]->th.th_ident = loc;
1378 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1380 return (status != 0) ? 0 : 1;
1395 KC_TRACE( 10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1397 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1415 KC_TRACE( 10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1417 if (! TCR_4(__kmp_init_parallel))
1418 __kmp_parallel_initialize();
1420 if ( __kmp_env_consistency_check ) {
1422 KMP_WARNING( ConstructIdentInvalid );
1424 __kmp_check_barrier( global_tid, ct_barrier, loc );
1428 __kmp_threads[global_tid]->th.th_ident = loc;
1430 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1434 if ( __kmp_env_consistency_check ) {
1438 if ( global_tid < 0 ) {
1439 KMP_WARNING( ThreadIdentInvalid );
1445 __kmp_pop_sync( global_tid, ct_master, loc );
1467 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1472 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1475 #if OMPT_SUPPORT && OMPT_TRACE 1476 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1477 kmp_team_t *team = this_thr -> th.th_team;
1478 int tid = __kmp_tid_from_gtid( global_tid );
1482 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1483 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1484 team->t.ompt_team_info.parallel_id,
1485 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1486 team->t.ompt_team_info.microtask);
1489 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1490 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1491 team->t.ompt_team_info.parallel_id,
1492 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1494 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1514 __kmp_exit_single( global_tid );
1515 KMP_POP_PARTITIONED_TIMER();
1517 #if OMPT_SUPPORT && OMPT_TRACE 1518 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1519 kmp_team_t *team = this_thr -> th.th_team;
1520 int tid = __kmp_tid_from_gtid( global_tid );
1523 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1524 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1525 team->t.ompt_team_info.parallel_id,
1526 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1541 KE_TRACE( 10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1543 #if OMPT_SUPPORT && OMPT_TRACE 1545 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1546 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1547 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1548 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1549 team_info->parallel_id, task_info->task_id);
1553 if ( __kmp_env_consistency_check )
1554 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1563 ompc_set_num_threads(
int arg )
1566 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1570 ompc_set_dynamic(
int flag )
1575 thread = __kmp_entry_thread();
1577 __kmp_save_internal_controls( thread );
1579 set__dynamic( thread, flag ? TRUE : FALSE );
1583 ompc_set_nested(
int flag )
1588 thread = __kmp_entry_thread();
1590 __kmp_save_internal_controls( thread );
1592 set__nested( thread, flag ? TRUE : FALSE );
1596 ompc_set_max_active_levels(
int max_active_levels )
1602 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1606 ompc_set_schedule( omp_sched_t kind,
int modifier )
1609 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1613 ompc_get_ancestor_thread_num(
int level )
1615 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1619 ompc_get_team_size(
int level )
1621 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1625 kmpc_set_stacksize(
int arg )
1628 __kmp_aux_set_stacksize( arg );
1632 kmpc_set_stacksize_s(
size_t arg )
1635 __kmp_aux_set_stacksize( arg );
1639 kmpc_set_blocktime(
int arg )
1644 gtid = __kmp_entry_gtid();
1645 tid = __kmp_tid_from_gtid(gtid);
1646 thread = __kmp_thread_from_gtid(gtid);
1648 __kmp_aux_set_blocktime( arg, thread, tid );
1652 kmpc_set_library(
int arg )
1655 __kmp_user_set_library( (
enum library_type)arg );
1659 kmpc_set_defaults(
char const * str )
1662 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
1666 kmpc_set_disp_num_buffers(
int arg )
1670 if( __kmp_init_serial == 0 && arg > 0 )
1671 __kmp_dispatch_num_buffers = arg;
1675 kmpc_set_affinity_mask_proc(
int proc,
void **mask )
1677 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1680 if ( ! TCR_4(__kmp_init_middle) ) {
1681 __kmp_middle_initialize();
1683 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1688 kmpc_unset_affinity_mask_proc(
int proc,
void **mask )
1690 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1693 if ( ! TCR_4(__kmp_init_middle) ) {
1694 __kmp_middle_initialize();
1696 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1701 kmpc_get_affinity_mask_proc(
int proc,
void **mask )
1703 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1706 if ( ! TCR_4(__kmp_init_middle) ) {
1707 __kmp_middle_initialize();
1709 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1760 KC_TRACE( 10, (
"__kmpc_copyprivate: called T#%d\n", gtid ));
1764 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1766 if ( __kmp_env_consistency_check ) {
1768 KMP_WARNING( ConstructIdentInvalid );
1774 if (didit) *data_ptr = cpy_data;
1778 __kmp_threads[gtid]->th.th_ident = loc;
1780 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1782 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1788 __kmp_threads[gtid]->th.th_ident = loc;
1790 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1795 #define INIT_LOCK __kmp_init_user_lock_with_checks 1796 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 1797 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 1798 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 1799 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 1800 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed 1801 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 1802 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 1803 #define TEST_LOCK __kmp_test_user_lock_with_checks 1804 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 1805 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 1806 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 1814 #if KMP_USE_DYNAMIC_LOCK 1817 static __forceinline
void 1818 __kmp_init_lock_with_hint(
ident_t *loc,
void **lock, kmp_dyna_lockseq_t seq)
1820 if (KMP_IS_D_LOCK(seq)) {
1821 KMP_INIT_D_LOCK(lock, seq);
1823 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1826 KMP_INIT_I_LOCK(lock, seq);
1828 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1829 __kmp_itt_lock_creating(ilk->lock, loc);
1835 static __forceinline
void 1836 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock, kmp_dyna_lockseq_t seq)
1840 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1841 seq = __kmp_user_lock_seq;
1845 seq = lockseq_nested_tas;
1849 seq = lockseq_nested_futex;
1852 case lockseq_ticket:
1853 seq = lockseq_nested_ticket;
1855 case lockseq_queuing:
1856 seq = lockseq_nested_queuing;
1859 seq = lockseq_nested_drdpa;
1862 seq = lockseq_nested_queuing;
1864 KMP_INIT_I_LOCK(lock, seq);
1866 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1867 __kmp_itt_lock_creating(ilk->lock, loc);
1873 __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock, uintptr_t hint)
1875 KMP_DEBUG_ASSERT(__kmp_init_serial);
1876 if (__kmp_env_consistency_check && user_lock == NULL) {
1877 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
1880 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1885 __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock, uintptr_t hint)
1887 KMP_DEBUG_ASSERT(__kmp_init_serial);
1888 if (__kmp_env_consistency_check && user_lock == NULL) {
1889 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
1892 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1895 #endif // KMP_USE_DYNAMIC_LOCK 1899 __kmpc_init_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1900 #if KMP_USE_DYNAMIC_LOCK 1901 KMP_DEBUG_ASSERT(__kmp_init_serial);
1902 if (__kmp_env_consistency_check && user_lock == NULL) {
1903 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
1905 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1907 #else // KMP_USE_DYNAMIC_LOCK 1909 static char const *
const func =
"omp_init_lock";
1910 kmp_user_lock_p lck;
1911 KMP_DEBUG_ASSERT( __kmp_init_serial );
1913 if ( __kmp_env_consistency_check ) {
1914 if ( user_lock == NULL ) {
1915 KMP_FATAL( LockIsUninitialized, func );
1919 KMP_CHECK_USER_LOCK_INIT();
1921 if ( ( __kmp_user_lock_kind == lk_tas )
1922 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1923 lck = (kmp_user_lock_p)user_lock;
1926 else if ( ( __kmp_user_lock_kind == lk_futex )
1927 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1928 lck = (kmp_user_lock_p)user_lock;
1932 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1935 __kmp_set_user_lock_location( lck, loc );
1937 #if OMPT_SUPPORT && OMPT_TRACE 1939 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1940 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1945 __kmp_itt_lock_creating( lck );
1948 #endif // KMP_USE_DYNAMIC_LOCK 1953 __kmpc_init_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1954 #if KMP_USE_DYNAMIC_LOCK 1956 KMP_DEBUG_ASSERT(__kmp_init_serial);
1957 if (__kmp_env_consistency_check && user_lock == NULL) {
1958 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
1960 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1962 #else // KMP_USE_DYNAMIC_LOCK 1964 static char const *
const func =
"omp_init_nest_lock";
1965 kmp_user_lock_p lck;
1966 KMP_DEBUG_ASSERT( __kmp_init_serial );
1968 if ( __kmp_env_consistency_check ) {
1969 if ( user_lock == NULL ) {
1970 KMP_FATAL( LockIsUninitialized, func );
1974 KMP_CHECK_USER_LOCK_INIT();
1976 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1977 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1978 lck = (kmp_user_lock_p)user_lock;
1981 else if ( ( __kmp_user_lock_kind == lk_futex )
1982 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
1983 <= OMP_NEST_LOCK_T_SIZE ) ) {
1984 lck = (kmp_user_lock_p)user_lock;
1988 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1991 INIT_NESTED_LOCK( lck );
1992 __kmp_set_user_lock_location( lck, loc );
1994 #if OMPT_SUPPORT && OMPT_TRACE 1996 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1997 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
2002 __kmp_itt_lock_creating( lck );
2005 #endif // KMP_USE_DYNAMIC_LOCK 2009 __kmpc_destroy_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2010 #if KMP_USE_DYNAMIC_LOCK 2013 kmp_user_lock_p lck;
2014 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2015 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2017 lck = (kmp_user_lock_p)user_lock;
2019 __kmp_itt_lock_destroyed(lck);
2021 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2023 kmp_user_lock_p lck;
2025 if ( ( __kmp_user_lock_kind == lk_tas )
2026 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2027 lck = (kmp_user_lock_p)user_lock;
2030 else if ( ( __kmp_user_lock_kind == lk_futex )
2031 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2032 lck = (kmp_user_lock_p)user_lock;
2036 lck = __kmp_lookup_user_lock( user_lock,
"omp_destroy_lock" );
2039 #if OMPT_SUPPORT && OMPT_TRACE 2041 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2042 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2047 __kmp_itt_lock_destroyed( lck );
2049 DESTROY_LOCK( lck );
2051 if ( ( __kmp_user_lock_kind == lk_tas )
2052 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2056 else if ( ( __kmp_user_lock_kind == lk_futex )
2057 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2062 __kmp_user_lock_free( user_lock, gtid, lck );
2064 #endif // KMP_USE_DYNAMIC_LOCK 2069 __kmpc_destroy_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2070 #if KMP_USE_DYNAMIC_LOCK 2073 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2074 __kmp_itt_lock_destroyed(ilk->lock);
2076 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2078 #else // KMP_USE_DYNAMIC_LOCK 2080 kmp_user_lock_p lck;
2082 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2083 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2084 lck = (kmp_user_lock_p)user_lock;
2087 else if ( ( __kmp_user_lock_kind == lk_futex )
2088 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2089 <= OMP_NEST_LOCK_T_SIZE ) ) {
2090 lck = (kmp_user_lock_p)user_lock;
2094 lck = __kmp_lookup_user_lock( user_lock,
"omp_destroy_nest_lock" );
2097 #if OMPT_SUPPORT && OMPT_TRACE 2099 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2100 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2105 __kmp_itt_lock_destroyed( lck );
2108 DESTROY_NESTED_LOCK( lck );
2110 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2111 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2115 else if ( ( __kmp_user_lock_kind == lk_futex )
2116 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2117 <= OMP_NEST_LOCK_T_SIZE ) ) {
2122 __kmp_user_lock_free( user_lock, gtid, lck );
2124 #endif // KMP_USE_DYNAMIC_LOCK 2128 __kmpc_set_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2130 #if KMP_USE_DYNAMIC_LOCK 2131 int tag = KMP_EXTRACT_D_TAG(user_lock);
2133 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2135 # if KMP_USE_INLINED_TAS 2136 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2137 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2139 # elif KMP_USE_INLINED_FUTEX 2140 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2141 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2145 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2148 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2151 #else // KMP_USE_DYNAMIC_LOCK 2153 kmp_user_lock_p lck;
2155 if ( ( __kmp_user_lock_kind == lk_tas )
2156 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2157 lck = (kmp_user_lock_p)user_lock;
2160 else if ( ( __kmp_user_lock_kind == lk_futex )
2161 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2162 lck = (kmp_user_lock_p)user_lock;
2166 lck = __kmp_lookup_user_lock( user_lock,
"omp_set_lock" );
2170 __kmp_itt_lock_acquiring( lck );
2173 ACQUIRE_LOCK( lck, gtid );
2176 __kmp_itt_lock_acquired( lck );
2179 #if OMPT_SUPPORT && OMPT_TRACE 2181 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2182 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2186 #endif // KMP_USE_DYNAMIC_LOCK 2190 __kmpc_set_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2191 #if KMP_USE_DYNAMIC_LOCK 2194 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2196 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2198 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2201 #if OMPT_SUPPORT && OMPT_TRACE 2207 #else // KMP_USE_DYNAMIC_LOCK 2209 kmp_user_lock_p lck;
2211 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2212 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2213 lck = (kmp_user_lock_p)user_lock;
2216 else if ( ( __kmp_user_lock_kind == lk_futex )
2217 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2218 <= OMP_NEST_LOCK_T_SIZE ) ) {
2219 lck = (kmp_user_lock_p)user_lock;
2223 lck = __kmp_lookup_user_lock( user_lock,
"omp_set_nest_lock" );
2227 __kmp_itt_lock_acquiring( lck );
2230 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
2233 __kmp_itt_lock_acquired( lck );
2236 #if OMPT_SUPPORT && OMPT_TRACE 2238 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2239 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2240 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2242 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2243 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2248 #endif // KMP_USE_DYNAMIC_LOCK 2252 __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2254 #if KMP_USE_DYNAMIC_LOCK 2256 int tag = KMP_EXTRACT_D_TAG(user_lock);
2258 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2260 # if KMP_USE_INLINED_TAS 2261 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2262 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2264 # elif KMP_USE_INLINED_FUTEX 2265 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2266 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2270 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2273 #else // KMP_USE_DYNAMIC_LOCK 2275 kmp_user_lock_p lck;
2280 if ( ( __kmp_user_lock_kind == lk_tas )
2281 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2282 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2285 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2287 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2291 lck = (kmp_user_lock_p)user_lock;
2295 else if ( ( __kmp_user_lock_kind == lk_futex )
2296 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2297 lck = (kmp_user_lock_p)user_lock;
2301 lck = __kmp_lookup_user_lock( user_lock,
"omp_unset_lock" );
2305 __kmp_itt_lock_releasing( lck );
2308 RELEASE_LOCK( lck, gtid );
2310 #if OMPT_SUPPORT && OMPT_BLAME 2312 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2313 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2317 #endif // KMP_USE_DYNAMIC_LOCK 2322 __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2324 #if KMP_USE_DYNAMIC_LOCK 2327 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2329 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2331 #else // KMP_USE_DYNAMIC_LOCK 2333 kmp_user_lock_p lck;
2337 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2338 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2339 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2341 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2343 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2345 if ( --(tl->lk.depth_locked) == 0 ) {
2346 TCW_4(tl->lk.poll, 0);
2351 lck = (kmp_user_lock_p)user_lock;
2355 else if ( ( __kmp_user_lock_kind == lk_futex )
2356 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2357 <= OMP_NEST_LOCK_T_SIZE ) ) {
2358 lck = (kmp_user_lock_p)user_lock;
2362 lck = __kmp_lookup_user_lock( user_lock,
"omp_unset_nest_lock" );
2366 __kmp_itt_lock_releasing( lck );
2370 release_status = RELEASE_NESTED_LOCK( lck, gtid );
2371 #if OMPT_SUPPORT && OMPT_BLAME 2373 if (release_status == KMP_LOCK_RELEASED) {
2374 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2375 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2378 }
else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2379 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2385 #endif // KMP_USE_DYNAMIC_LOCK 2390 __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2394 #if KMP_USE_DYNAMIC_LOCK 2396 int tag = KMP_EXTRACT_D_TAG(user_lock);
2398 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2400 # if KMP_USE_INLINED_TAS 2401 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2402 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2404 # elif KMP_USE_INLINED_FUTEX 2405 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2406 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2410 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2414 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2419 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2424 #else // KMP_USE_DYNAMIC_LOCK 2426 kmp_user_lock_p lck;
2429 if ( ( __kmp_user_lock_kind == lk_tas )
2430 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2431 lck = (kmp_user_lock_p)user_lock;
2434 else if ( ( __kmp_user_lock_kind == lk_futex )
2435 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2436 lck = (kmp_user_lock_p)user_lock;
2440 lck = __kmp_lookup_user_lock( user_lock,
"omp_test_lock" );
2444 __kmp_itt_lock_acquiring( lck );
2447 rc = TEST_LOCK( lck, gtid );
2450 __kmp_itt_lock_acquired( lck );
2452 __kmp_itt_lock_cancelled( lck );
2455 return ( rc ? FTN_TRUE : FTN_FALSE );
2459 #endif // KMP_USE_DYNAMIC_LOCK 2464 __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2466 #if KMP_USE_DYNAMIC_LOCK 2469 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2471 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2474 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2476 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2481 #else // KMP_USE_DYNAMIC_LOCK 2483 kmp_user_lock_p lck;
2486 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2487 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2488 lck = (kmp_user_lock_p)user_lock;
2491 else if ( ( __kmp_user_lock_kind == lk_futex )
2492 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2493 <= OMP_NEST_LOCK_T_SIZE ) ) {
2494 lck = (kmp_user_lock_p)user_lock;
2498 lck = __kmp_lookup_user_lock( user_lock,
"omp_test_nest_lock" );
2502 __kmp_itt_lock_acquiring( lck );
2505 rc = TEST_NESTED_LOCK( lck, gtid );
2508 __kmp_itt_lock_acquired( lck );
2510 __kmp_itt_lock_cancelled( lck );
2517 #endif // KMP_USE_DYNAMIC_LOCK 2530 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \ 2531 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) ) 2533 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 2534 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) 2540 static __forceinline
void 2541 __kmp_enter_critical_section_reduce_block(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2547 kmp_user_lock_p lck;
2549 #if KMP_USE_DYNAMIC_LOCK 2551 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2554 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2555 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2557 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
2562 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2563 lck = (kmp_user_lock_p)lk;
2564 KMP_DEBUG_ASSERT(lck != NULL);
2565 if (__kmp_env_consistency_check) {
2566 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2568 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
2570 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2572 KMP_DEBUG_ASSERT(lck != NULL);
2573 if (__kmp_env_consistency_check) {
2574 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2576 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
2579 #else // KMP_USE_DYNAMIC_LOCK 2584 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2585 lck = (kmp_user_lock_p)crit;
2588 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2590 KMP_DEBUG_ASSERT( lck != NULL );
2592 if ( __kmp_env_consistency_check )
2593 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2595 __kmp_acquire_user_lock_with_checks( lck, global_tid );
2597 #endif // KMP_USE_DYNAMIC_LOCK 2601 static __forceinline
void 2602 __kmp_end_critical_section_reduce_block(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2604 kmp_user_lock_p lck;
2606 #if KMP_USE_DYNAMIC_LOCK 2608 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2609 lck = (kmp_user_lock_p)crit;
2610 if (__kmp_env_consistency_check)
2611 __kmp_pop_sync(global_tid, ct_critical, loc);
2612 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2614 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2615 if (__kmp_env_consistency_check)
2616 __kmp_pop_sync(global_tid, ct_critical, loc);
2617 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2620 #else // KMP_USE_DYNAMIC_LOCK 2624 if ( __kmp_base_user_lock_size > 32 ) {
2625 lck = *( (kmp_user_lock_p *) crit );
2626 KMP_ASSERT( lck != NULL );
2628 lck = (kmp_user_lock_p) crit;
2631 if ( __kmp_env_consistency_check )
2632 __kmp_pop_sync( global_tid, ct_critical, loc );
2634 __kmp_release_user_lock_with_checks( lck, global_tid );
2636 #endif // KMP_USE_DYNAMIC_LOCK 2656 ident_t *loc, kmp_int32 global_tid,
2657 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2658 kmp_critical_name *lck ) {
2662 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2666 int teams_swapped = 0, task_state;
2668 KA_TRACE( 10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2675 if( ! TCR_4( __kmp_init_parallel ) )
2676 __kmp_parallel_initialize();
2679 #if KMP_USE_DYNAMIC_LOCK 2680 if ( __kmp_env_consistency_check )
2681 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2683 if ( __kmp_env_consistency_check )
2684 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2688 th = __kmp_thread_from_gtid(global_tid);
2689 if( th->th.th_teams_microtask ) {
2690 team = th->th.th_team;
2691 if( team->t.t_level == th->th.th_teams_level ) {
2693 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
2696 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2697 th->th.th_team = team->t.t_parent;
2698 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2699 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2700 task_state = th->th.th_task_state;
2701 th->th.th_task_state = 0;
2704 #endif // OMP_40_ENABLED 2715 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2716 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2718 if( packed_reduction_method == critical_reduce_block ) {
2720 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2723 }
else if( packed_reduction_method == empty_reduce_block ) {
2728 }
else if( packed_reduction_method == atomic_reduce_block ) {
2735 if ( __kmp_env_consistency_check )
2736 __kmp_pop_sync( global_tid, ct_reduce, loc );
2738 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2751 __kmp_threads[global_tid]->th.th_ident = loc;
2753 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2754 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2758 if ( __kmp_env_consistency_check ) {
2760 __kmp_pop_sync( global_tid, ct_reduce, loc );
2771 if( teams_swapped ) {
2773 th->th.th_info.ds.ds_tid = 0;
2774 th->th.th_team = team;
2775 th->th.th_team_nproc = team->t.t_nproc;
2776 th->th.th_task_team = team->t.t_task_team[task_state];
2777 th->th.th_task_state = task_state;
2780 KA_TRACE( 10, (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2796 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2798 KA_TRACE( 10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2800 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2802 if( packed_reduction_method == critical_reduce_block ) {
2804 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2806 }
else if( packed_reduction_method == empty_reduce_block ) {
2810 }
else if( packed_reduction_method == atomic_reduce_block ) {
2817 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2828 if ( __kmp_env_consistency_check )
2829 __kmp_pop_sync( global_tid, ct_reduce, loc );
2831 KA_TRACE( 10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2853 ident_t *loc, kmp_int32 global_tid,
2854 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
2855 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2856 kmp_critical_name *lck )
2860 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2862 KA_TRACE( 10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2869 if( ! TCR_4( __kmp_init_parallel ) )
2870 __kmp_parallel_initialize();
2873 #if KMP_USE_DYNAMIC_LOCK 2874 if ( __kmp_env_consistency_check )
2875 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2877 if ( __kmp_env_consistency_check )
2878 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2881 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2882 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2884 if( packed_reduction_method == critical_reduce_block ) {
2886 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2889 }
else if( packed_reduction_method == empty_reduce_block ) {
2894 }
else if( packed_reduction_method == atomic_reduce_block ) {
2898 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2904 __kmp_threads[global_tid]->th.th_ident = loc;
2906 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2907 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2911 if ( __kmp_env_consistency_check ) {
2913 __kmp_pop_sync( global_tid, ct_reduce, loc );
2924 KA_TRACE( 10, (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2941 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2943 KA_TRACE( 10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2945 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2950 if( packed_reduction_method == critical_reduce_block ) {
2952 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2956 __kmp_threads[global_tid]->th.th_ident = loc;
2958 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2960 }
else if( packed_reduction_method == empty_reduce_block ) {
2966 __kmp_threads[global_tid]->th.th_ident = loc;
2968 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2970 }
else if( packed_reduction_method == atomic_reduce_block ) {
2974 __kmp_threads[global_tid]->th.th_ident = loc;
2976 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2978 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2981 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2990 if ( __kmp_env_consistency_check )
2991 __kmp_pop_sync( global_tid, ct_reduce, loc );
2993 KA_TRACE( 10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2998 #undef __KMP_GET_REDUCTION_METHOD 2999 #undef __KMP_SET_REDUCTION_METHOD 3004 __kmpc_get_taskid() {
3007 kmp_info_t * thread;
3009 gtid = __kmp_get_gtid();
3013 thread = __kmp_thread_from_gtid( gtid );
3014 return thread->th.th_current_task->td_task_id;
3020 __kmpc_get_parent_taskid() {
3023 kmp_info_t * thread;
3024 kmp_taskdata_t * parent_task;
3026 gtid = __kmp_get_gtid();
3030 thread = __kmp_thread_from_gtid( gtid );
3031 parent_task = thread->th.th_current_task->td_parent;
3032 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
3036 void __kmpc_place_threads(
int nS,
int sO,
int nC,
int cO,
int nT)
3038 if ( ! __kmp_init_serial ) {
3039 __kmp_serial_initialize();
3041 __kmp_place_num_sockets = nS;
3042 __kmp_place_socket_offset = sO;
3043 __kmp_place_num_cores = nC;
3044 __kmp_place_core_offset = cO;
3045 __kmp_place_num_threads_per_core = nT;
3061 __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
struct kmp_dim * dims)
3064 kmp_int64 last, trace_count;
3065 kmp_info_t *th = __kmp_threads[gtid];
3066 kmp_team_t *team = th->th.th_team;
3068 kmp_disp_t *pr_buf = th->th.th_dispatch;
3069 dispatch_shared_info_t *sh_buf;
3071 KA_TRACE(20,(
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3072 gtid, num_dims, !team->t.t_serialized));
3073 KMP_DEBUG_ASSERT(dims != NULL);
3074 KMP_DEBUG_ASSERT(num_dims > 0);
3076 if( team->t.t_serialized ) {
3077 KA_TRACE(20,(
"__kmpc_doacross_init() exit: serialized team\n"));
3080 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3081 idx = pr_buf->th_doacross_buf_idx++;
3082 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3085 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3086 pr_buf->th_doacross_info =
3087 (kmp_int64*)__kmp_thread_malloc(th,
sizeof(kmp_int64)*(4 * num_dims + 1));
3088 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3089 pr_buf->th_doacross_info[0] = (kmp_int64)num_dims;
3091 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3092 pr_buf->th_doacross_info[2] = dims[0].lo;
3093 pr_buf->th_doacross_info[3] = dims[0].up;
3094 pr_buf->th_doacross_info[4] = dims[0].st;
3096 for( j = 1; j < num_dims; ++j ) {
3097 kmp_int64 range_length;
3098 if( dims[j].st == 1 ) {
3100 range_length = dims[j].up - dims[j].lo + 1;
3102 if( dims[j].st > 0 ) {
3103 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3104 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3106 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3107 range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3110 pr_buf->th_doacross_info[last++] = range_length;
3111 pr_buf->th_doacross_info[last++] = dims[j].lo;
3112 pr_buf->th_doacross_info[last++] = dims[j].up;
3113 pr_buf->th_doacross_info[last++] = dims[j].st;
3118 if( dims[0].st == 1 ) {
3119 trace_count = dims[0].up - dims[0].lo + 1;
3120 }
else if( dims[0].st > 0 ) {
3121 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3122 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3124 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3125 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3127 for( j = 1; j < num_dims; ++j ) {
3128 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3130 KMP_DEBUG_ASSERT(trace_count > 0);
3133 if( idx != sh_buf->doacross_buf_idx ) {
3135 __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
3139 flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
3140 (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
3141 if( flags == NULL ) {
3143 kmp_int64 size = trace_count / 8 + 8;
3144 sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
3145 }
else if( (kmp_int64)flags == 1 ) {
3147 while( (
volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
3151 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1);
3152 pr_buf->th_doacross_flags = sh_buf->doacross_flags;
3154 KA_TRACE(20,(
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3158 __kmpc_doacross_wait(
ident_t *loc,
int gtid,
long long *vec)
3160 kmp_int32 shft, num_dims, i;
3162 kmp_int64 iter_number;
3163 kmp_info_t *th = __kmp_threads[gtid];
3164 kmp_team_t *team = th->th.th_team;
3166 kmp_int64 lo, up, st;
3168 KA_TRACE(20,(
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3169 if( team->t.t_serialized ) {
3170 KA_TRACE(20,(
"__kmpc_doacross_wait() exit: serialized team\n"));
3175 pr_buf = th->th.th_dispatch;
3176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3177 num_dims = pr_buf->th_doacross_info[0];
3178 lo = pr_buf->th_doacross_info[2];
3179 up = pr_buf->th_doacross_info[3];
3180 st = pr_buf->th_doacross_info[4];
3182 if( vec[0] < lo || vec[0] > up ) {
3184 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3185 gtid, vec[0], lo, up));
3188 iter_number = vec[0] - lo;
3189 }
else if( st > 0 ) {
3190 if( vec[0] < lo || vec[0] > up ) {
3192 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3193 gtid, vec[0], lo, up));
3196 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3198 if( vec[0] > lo || vec[0] < up ) {
3200 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3201 gtid, vec[0], lo, up));
3204 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3206 for( i = 1; i < num_dims; ++i ) {
3208 kmp_int32 j = i * 4;
3209 ln = pr_buf->th_doacross_info[j + 1];
3210 lo = pr_buf->th_doacross_info[j + 2];
3211 up = pr_buf->th_doacross_info[j + 3];
3212 st = pr_buf->th_doacross_info[j + 4];
3214 if( vec[i] < lo || vec[i] > up ) {
3216 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3217 gtid, vec[i], lo, up));
3221 }
else if( st > 0 ) {
3222 if( vec[i] < lo || vec[i] > up ) {
3224 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3225 gtid, vec[i], lo, up));
3228 iter = (kmp_uint64)(vec[i] - lo) / st;
3230 if( vec[i] > lo || vec[i] < up ) {
3232 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3233 gtid, vec[i], lo, up));
3236 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3238 iter_number = iter + ln * iter_number;
3240 shft = iter_number % 32;
3243 while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
3246 KA_TRACE(20,(
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3247 gtid, (iter_number<<5)+shft));
3251 __kmpc_doacross_post(
ident_t *loc,
int gtid,
long long *vec)
3253 kmp_int32 shft, num_dims, i;
3255 kmp_int64 iter_number;
3256 kmp_info_t *th = __kmp_threads[gtid];
3257 kmp_team_t *team = th->th.th_team;
3261 KA_TRACE(20,(
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
3262 if( team->t.t_serialized ) {
3263 KA_TRACE(20,(
"__kmpc_doacross_post() exit: serialized team\n"));
3268 pr_buf = th->th.th_dispatch;
3269 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3270 num_dims = pr_buf->th_doacross_info[0];
3271 lo = pr_buf->th_doacross_info[2];
3272 st = pr_buf->th_doacross_info[4];
3274 iter_number = vec[0] - lo;
3275 }
else if( st > 0 ) {
3276 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3278 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3280 for( i = 1; i < num_dims; ++i ) {
3282 kmp_int32 j = i * 4;
3283 ln = pr_buf->th_doacross_info[j + 1];
3284 lo = pr_buf->th_doacross_info[j + 2];
3285 st = pr_buf->th_doacross_info[j + 4];
3288 }
else if( st > 0 ) {
3289 iter = (kmp_uint64)(vec[i] - lo) / st;
3291 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3293 iter_number = iter + ln * iter_number;
3295 shft = iter_number % 32;
3298 if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
3299 KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
3300 KA_TRACE(20,(
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
3301 gtid, (iter_number<<5)+shft));
3305 __kmpc_doacross_fini(
ident_t *loc,
int gtid)
3308 kmp_info_t *th = __kmp_threads[gtid];
3309 kmp_team_t *team = th->th.th_team;
3310 kmp_disp_t *pr_buf = th->th.th_dispatch;
3312 KA_TRACE(20,(
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3313 if( team->t.t_serialized ) {
3314 KA_TRACE(20,(
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
3317 num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
3318 if( num_done == th->th.th_team_nproc ) {
3320 int idx = pr_buf->th_doacross_buf_idx - 1;
3321 dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3322 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
3323 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3324 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3325 __kmp_thread_free(th, (
void*)sh_buf->doacross_flags);
3326 sh_buf->doacross_flags = NULL;
3327 sh_buf->doacross_num_done = 0;
3328 sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers;
3331 __kmp_thread_free(th, (
void*)pr_buf->th_doacross_info);
3332 pr_buf->th_doacross_info = NULL;
3333 KA_TRACE(20,(
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)