23 #if defined(_WIN32_WINNT) && defined(_M_IX86) 25 #define _WIN32_WINNT 0x0502 29 #include "kmp_error.h" 32 #include "kmp_stats.h" 34 #if KMP_OS_WINDOWS && KMP_ARCH_X86 39 #include "ompt-specific.h" 44 #if KMP_STATIC_STEAL_ENABLED 48 template <
typename T>
struct dispatch_private_infoXX_template {
49 typedef typename traits_t<T>::unsigned_t UT;
50 typedef typename traits_t<T>::signed_t ST;
57 T static_steal_counter;
67 struct KMP_ALIGN(32) {
85 template <
typename T>
struct dispatch_private_infoXX_template {
86 typedef typename traits_t<T>::unsigned_t UT;
87 typedef typename traits_t<T>::signed_t ST;
110 template <
typename T>
struct KMP_ALIGN_CACHE dispatch_private_info_template {
113 union KMP_ALIGN_CACHE private_info_tmpl {
114 dispatch_private_infoXX_template<T> p;
115 dispatch_private_info64_t p64;
119 kmp_uint32 ordered_bumped;
121 kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
122 dispatch_private_info *next;
124 kmp_uint32 type_size;
125 enum cons_type pushed_ws;
130 template <
typename UT>
struct dispatch_shared_infoXX_template {
133 volatile UT iteration;
134 volatile UT num_done;
135 volatile UT ordered_iteration;
137 UT ordered_dummy[KMP_MAX_ORDERED - 3];
141 template <
typename UT>
struct dispatch_shared_info_template {
143 union shared_info_tmpl {
144 dispatch_shared_infoXX_template<UT> s;
145 dispatch_shared_info64_t s64;
147 volatile kmp_uint32 buffer_index;
149 volatile kmp_int32 doacross_buf_idx;
150 kmp_uint32 *doacross_flags;
151 kmp_int32 doacross_num_done;
163 #undef USE_TEST_LOCKS 166 template <
typename T>
static __forceinline T test_then_add(
volatile T *p, T d);
169 __forceinline kmp_int32 test_then_add<kmp_int32>(
volatile kmp_int32 *p,
172 r = KMP_TEST_THEN_ADD32(p, d);
177 __forceinline kmp_int64 test_then_add<kmp_int64>(
volatile kmp_int64 *p,
180 r = KMP_TEST_THEN_ADD64(p, d);
185 template <
typename T>
static __forceinline T test_then_inc_acq(
volatile T *p);
188 __forceinline kmp_int32 test_then_inc_acq<kmp_int32>(
volatile kmp_int32 *p) {
190 r = KMP_TEST_THEN_INC_ACQ32(p);
195 __forceinline kmp_int64 test_then_inc_acq<kmp_int64>(
volatile kmp_int64 *p) {
197 r = KMP_TEST_THEN_INC_ACQ64(p);
202 template <
typename T>
static __forceinline T test_then_inc(
volatile T *p);
205 __forceinline kmp_int32 test_then_inc<kmp_int32>(
volatile kmp_int32 *p) {
207 r = KMP_TEST_THEN_INC32(p);
212 __forceinline kmp_int64 test_then_inc<kmp_int64>(
volatile kmp_int64 *p) {
214 r = KMP_TEST_THEN_INC64(p);
219 template <
typename T>
220 static __forceinline kmp_int32 compare_and_swap(
volatile T *p, T c, T s);
223 __forceinline kmp_int32 compare_and_swap<kmp_int32>(
volatile kmp_int32 *p,
224 kmp_int32 c, kmp_int32 s) {
225 return KMP_COMPARE_AND_STORE_REL32(p, c, s);
229 __forceinline kmp_int32 compare_and_swap<kmp_int64>(
volatile kmp_int64 *p,
230 kmp_int64 c, kmp_int64 s) {
231 return KMP_COMPARE_AND_STORE_REL64(p, c, s);
247 template <
typename UT>
251 volatile UT *spinner, UT checker,
252 kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(
256 volatile UT *spin = spinner;
259 kmp_uint32 (*f)(UT, UT) = pred;
262 KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
263 KMP_INIT_YIELD(spins);
265 while (!f(r = *spin, check)) {
266 KMP_FSYNC_SPIN_PREPARE(obj);
274 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
275 KMP_YIELD_SPIN(spins);
277 KMP_FSYNC_SPIN_ACQUIRED(obj);
281 template <
typename UT>
static kmp_uint32 __kmp_eq(UT value, UT checker) {
282 return value == checker;
285 template <
typename UT>
static kmp_uint32 __kmp_neq(UT value, UT checker) {
286 return value != checker;
289 template <
typename UT>
static kmp_uint32 __kmp_lt(UT value, UT checker) {
290 return value < checker;
293 template <
typename UT>
static kmp_uint32 __kmp_ge(UT value, UT checker) {
294 return value >= checker;
297 template <
typename UT>
static kmp_uint32 __kmp_le(UT value, UT checker) {
298 return value <= checker;
303 static void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
307 KMP_DEBUG_ASSERT(gtid_ref);
309 if (__kmp_env_consistency_check) {
310 th = __kmp_threads[*gtid_ref];
311 if (th->th.th_root->r.r_active &&
312 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
313 #if KMP_USE_DYNAMIC_LOCK 314 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
316 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
322 template <
typename UT>
323 static void __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
324 typedef typename traits_t<UT>::signed_t ST;
325 dispatch_private_info_template<UT> *pr;
327 int gtid = *gtid_ref;
329 kmp_info_t *th = __kmp_threads[gtid];
330 KMP_DEBUG_ASSERT(th->th.th_dispatch);
332 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid));
333 if (__kmp_env_consistency_check) {
334 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
335 th->th.th_dispatch->th_dispatch_pr_current);
336 if (pr->pushed_ws != ct_none) {
337 #if KMP_USE_DYNAMIC_LOCK 338 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0);
340 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL);
345 if (!th->th.th_team->t.t_serialized) {
346 dispatch_shared_info_template<UT> *sh =
347 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
348 th->th.th_dispatch->th_dispatch_sh_current);
351 if (!__kmp_env_consistency_check) {
352 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
353 th->th.th_dispatch->th_dispatch_pr_current);
355 lower = pr->u.p.ordered_lower;
357 #if !defined(KMP_GOMP_COMPAT) 358 if (__kmp_env_consistency_check) {
359 if (pr->ordered_bumped) {
360 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
361 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
362 ct_ordered_in_pdo, loc_ref,
363 &p->stack_data[p->w_top]);
373 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d before wait: " 374 "ordered_iter:%%%s lower:%%%s\n",
375 traits_t<UT>::spec, traits_t<UT>::spec);
376 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
377 __kmp_str_free(&buff);
381 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
382 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
388 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d after wait: " 389 "ordered_iter:%%%s lower:%%%s\n",
390 traits_t<UT>::spec, traits_t<UT>::spec);
391 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
392 __kmp_str_free(&buff);
396 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid));
399 static void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
403 if (__kmp_env_consistency_check) {
404 th = __kmp_threads[*gtid_ref];
405 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
406 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
411 template <
typename UT>
412 static void __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
413 typedef typename traits_t<UT>::signed_t ST;
414 dispatch_private_info_template<UT> *pr;
416 int gtid = *gtid_ref;
418 kmp_info_t *th = __kmp_threads[gtid];
419 KMP_DEBUG_ASSERT(th->th.th_dispatch);
421 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid));
422 if (__kmp_env_consistency_check) {
423 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
424 th->th.th_dispatch->th_dispatch_pr_current);
425 if (pr->pushed_ws != ct_none) {
426 __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref);
430 if (!th->th.th_team->t.t_serialized) {
431 dispatch_shared_info_template<UT> *sh =
432 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
433 th->th.th_dispatch->th_dispatch_sh_current);
435 if (!__kmp_env_consistency_check) {
436 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
437 th->th.th_dispatch->th_dispatch_pr_current);
440 KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration));
441 #if !defined(KMP_GOMP_COMPAT) 442 if (__kmp_env_consistency_check) {
443 if (pr->ordered_bumped != 0) {
444 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
446 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
447 ct_ordered_in_pdo, loc_ref,
448 &p->stack_data[p->w_top]);
455 pr->ordered_bumped += 1;
458 (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
459 gtid, pr->ordered_bumped));
464 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
468 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid));
472 template <
typename UT>
473 static __forceinline
long double __kmp_pow(
long double x, UT y) {
474 long double s = 1.0L;
476 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
492 template <
typename T>
493 static __inline
typename traits_t<T>::unsigned_t
494 __kmp_dispatch_guided_remaining(T tc,
typename traits_t<T>::floating_t base,
495 typename traits_t<T>::unsigned_t idx) {
501 typedef typename traits_t<T>::unsigned_t UT;
503 long double x = tc * __kmp_pow<UT>(base, idx);
516 static int guided_int_param = 2;
517 static double guided_flt_param = 0.5;
521 template <
typename T>
524 T ub,
typename traits_t<T>::signed_t st,
525 typename traits_t<T>::signed_t chunk,
int push_ws) {
526 typedef typename traits_t<T>::unsigned_t UT;
527 typedef typename traits_t<T>::signed_t ST;
528 typedef typename traits_t<T>::floating_t DBL;
534 kmp_uint32 my_buffer_index;
535 dispatch_private_info_template<T> *pr;
536 dispatch_shared_info_template<UT>
volatile *sh;
538 KMP_BUILD_ASSERT(
sizeof(dispatch_private_info_template<T>) ==
539 sizeof(dispatch_private_info));
540 KMP_BUILD_ASSERT(
sizeof(dispatch_shared_info_template<UT>) ==
541 sizeof(dispatch_shared_info));
543 if (!TCR_4(__kmp_init_parallel))
544 __kmp_parallel_initialize();
546 #if INCLUDE_SSC_MARKS 547 SSC_MARK_DISPATCH_INIT();
553 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d called: schedule:%%d " 554 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
555 traits_t<ST>::spec, traits_t<T>::spec,
556 traits_t<T>::spec, traits_t<ST>::spec);
557 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
558 __kmp_str_free(&buff);
562 th = __kmp_threads[gtid];
563 team = th->th.th_team;
564 active = !team->t.t_serialized;
565 th->th.th_ident = loc;
568 kmp_uint64 cur_chunk = chunk;
569 int itt_need_metadata_reporting = __itt_metadata_add_ptr &&
570 __kmp_forkjoin_frames_mode == 3 &&
571 KMP_MASTER_GTID(gtid) &&
573 th->th.th_teams_microtask == NULL &&
575 team->t.t_active_level == 1;
578 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
579 th->th.th_dispatch->th_disp_buffer);
581 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
582 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
584 my_buffer_index = th->th.th_dispatch->th_disp_index++;
587 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
589 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
590 sh =
reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
591 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
594 #if (KMP_STATIC_STEAL_ENABLED) 595 if (SCHEDULE_HAS_NONMONOTONIC(schedule))
597 schedule = kmp_sch_static_steal;
600 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
610 pr->type_size = traits_t<T>::type_size;
620 schedule = __kmp_static;
622 if (schedule == kmp_sch_runtime) {
625 schedule = team->t.t_sched.r_sched_type;
629 schedule = __kmp_guided;
631 schedule = __kmp_static;
635 chunk = team->t.t_sched.chunk;
643 buff = __kmp_str_format(
644 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
646 KD_TRACE(10, (buff, gtid, schedule, chunk));
647 __kmp_str_free(&buff);
652 schedule = __kmp_guided;
655 chunk = KMP_DEFAULT_CHUNK;
661 schedule = __kmp_auto;
666 buff = __kmp_str_format(
"__kmp_dispatch_init: kmp_sch_auto: T#%%d new: " 667 "schedule:%%d chunk:%%%s\n",
669 KD_TRACE(10, (buff, gtid, schedule, chunk));
670 __kmp_str_free(&buff);
676 if (schedule == kmp_sch_guided_analytical_chunked &&
677 th->th.th_team_nproc > 1 << 20) {
678 schedule = kmp_sch_guided_iterative_chunked;
679 KMP_WARNING(DispatchManyThreads);
681 if (schedule == kmp_sch_runtime_simd) {
683 schedule = team->t.t_sched.r_sched_type;
687 schedule == __kmp_static) {
688 schedule = kmp_sch_static_balanced_chunked;
691 schedule = kmp_sch_guided_simd;
693 chunk = team->t.t_sched.chunk * chunk;
702 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d new: schedule:%%d" 705 KD_TRACE(10, (buff, gtid, schedule, chunk));
706 __kmp_str_free(&buff);
710 pr->u.p.parm1 = chunk;
713 "unknown scheduling type");
717 if (__kmp_env_consistency_check) {
719 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
720 (pr->ordered ? ct_pdo_ordered : ct_pdo), loc);
734 tc = (UT)(lb - ub) / (-st) + 1;
742 tc = (UT)(ub - lb) / st + 1;
750 if (schedule == __kmp_static) {
764 pr->u.p.last_upper = ub + st;
770 if (pr->ordered == 0) {
771 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
772 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
774 pr->ordered_bumped = 0;
776 pr->u.p.ordered_lower = 1;
777 pr->u.p.ordered_upper = 0;
779 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
780 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
784 if (__kmp_env_consistency_check) {
785 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
787 __kmp_push_workshare(gtid, ws, loc);
790 __kmp_check_workshare(gtid, ws, loc);
791 pr->pushed_ws = ct_none;
796 #if (KMP_STATIC_STEAL_ENABLED) 797 case kmp_sch_static_steal: {
798 T nproc = th->th.th_team_nproc;
802 (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid));
804 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
805 if (nproc > 1 && ntc >= nproc) {
807 T
id = __kmp_tid_from_gtid(gtid);
808 T small_chunk, extras;
810 small_chunk = ntc / nproc;
811 extras = ntc % nproc;
813 init =
id * small_chunk + (
id < extras ? id : extras);
814 pr->u.p.count = init;
815 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
819 pr->u.p.parm4 = (
id + 1) % nproc;
821 if (traits_t<T>::type_size > 4) {
827 KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL);
828 th->th.th_dispatch->th_steal_lock =
829 (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
830 __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
834 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 835 "kmp_sch_static_balanced\n",
837 schedule = kmp_sch_static_balanced;
843 case kmp_sch_static_balanced: {
844 T nproc = th->th.th_team_nproc;
847 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
851 T
id = __kmp_tid_from_gtid(gtid);
857 pr->u.p.parm1 = (
id == tc - 1);
860 pr->u.p.parm1 = FALSE;
864 T small_chunk = tc / nproc;
865 T extras = tc % nproc;
866 init =
id * small_chunk + (
id < extras ? id : extras);
867 limit = init + small_chunk - (
id < extras ? 0 : 1);
868 pr->u.p.parm1 = (
id == nproc - 1);
874 pr->u.p.parm1 = TRUE;
877 pr->u.p.parm1 = FALSE;
883 if (itt_need_metadata_reporting)
884 cur_chunk = limit - init + 1;
887 pr->u.p.lb = lb + init;
888 pr->u.p.ub = lb + limit;
891 T ub_tmp = lb + limit * st;
892 pr->u.p.lb = lb + init * st;
896 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
898 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
902 pr->u.p.ordered_lower = init;
903 pr->u.p.ordered_upper = limit;
907 case kmp_sch_static_balanced_chunked: {
909 T nth = th->th.th_team_nproc;
910 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d runtime(simd:static)" 911 " -> falling-through to static_greedy\n",
913 schedule = kmp_sch_static_greedy;
915 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
920 case kmp_sch_guided_iterative_chunked:
921 case kmp_sch_guided_simd: {
922 T nproc = th->th.th_team_nproc;
923 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked" 928 if ((2L * chunk + 1) * nproc >= tc) {
930 schedule = kmp_sch_dynamic_chunked;
933 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
934 *(
double *)&pr->u.p.parm3 =
935 guided_flt_param / nproc;
938 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 939 "kmp_sch_static_greedy\n",
941 schedule = kmp_sch_static_greedy;
943 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",
949 case kmp_sch_guided_analytical_chunked: {
950 T nproc = th->th.th_team_nproc;
951 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked" 955 if ((2L * chunk + 1) * nproc >= tc) {
957 schedule = kmp_sch_dynamic_chunked;
962 #if KMP_OS_WINDOWS && KMP_ARCH_X86 972 unsigned int oldFpcw = _control87(0, 0);
973 _control87(_PC_64, _MCW_PC);
976 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
983 x = (
long double)1.0 - (
long double)0.5 / nproc;
994 ptrdiff_t natural_alignment =
995 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
999 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
1004 *(DBL *)&pr->u.p.parm3 = x;
1009 UT left, right, mid;
1017 p = __kmp_pow<UT>(x, right);
1022 }
while (p > target && right < (1 << 27));
1030 while (left + 1 < right) {
1031 mid = (left + right) / 2;
1032 if (__kmp_pow<UT>(x, mid) > target) {
1041 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
1042 __kmp_pow<UT>(x, cross) <= target);
1045 pr->u.p.parm2 = cross;
1048 #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) 1049 #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) 1051 #define GUIDED_ANALYTICAL_WORKAROUND (x) 1054 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
1055 tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
1057 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1059 _control87(oldFpcw, _MCW_PC);
1063 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 1064 "kmp_sch_static_greedy\n",
1066 schedule = kmp_sch_static_greedy;
1072 case kmp_sch_static_greedy:
1074 (
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n", gtid));
1075 pr->u.p.parm1 = (th->th.th_team_nproc > 1)
1076 ? (tc + th->th.th_team_nproc - 1) / th->th.th_team_nproc
1079 case kmp_sch_static_chunked:
1080 case kmp_sch_dynamic_chunked:
1081 if (pr->u.p.parm1 <= 0) {
1082 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
1084 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d " 1085 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
1088 case kmp_sch_trapezoidal: {
1091 T parm1, parm2, parm3, parm4;
1093 (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid));
1098 parm2 = (tc / (2 * th->th.th_team_nproc));
1108 }
else if (parm1 > parm2) {
1113 parm3 = (parm2 + parm1);
1114 parm3 = (2 * tc + parm3 - 1) / parm3;
1121 parm4 = (parm3 - 1);
1122 parm4 = (parm2 - parm1) / parm4;
1129 pr->u.p.parm1 = parm1;
1130 pr->u.p.parm2 = parm2;
1131 pr->u.p.parm3 = parm3;
1132 pr->u.p.parm4 = parm4;
1137 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
1138 KMP_HNT(GetNewerLibrary),
1143 pr->schedule = schedule;
1148 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " 1149 "sh->buffer_index:%d\n",
1150 gtid, my_buffer_index, sh->buffer_index));
1151 __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
1152 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
1156 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " 1157 "sh->buffer_index:%d\n",
1158 gtid, my_buffer_index, sh->buffer_index));
1160 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
1161 th->th.th_dispatch->th_dispatch_sh_current =
1162 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
1165 __kmp_itt_ordered_init(gtid);
1168 if (itt_need_metadata_reporting) {
1170 kmp_uint64 schedtype = 0;
1172 case kmp_sch_static_chunked:
1173 case kmp_sch_static_balanced:
1175 case kmp_sch_static_greedy:
1176 cur_chunk = pr->u.p.parm1;
1178 case kmp_sch_dynamic_chunked:
1181 case kmp_sch_guided_iterative_chunked:
1182 case kmp_sch_guided_analytical_chunked:
1183 case kmp_sch_guided_simd:
1192 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1201 buff = __kmp_str_format(
1202 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " 1204 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" 1205 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1206 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
1207 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1208 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
1209 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1210 KD_TRACE(10, (buff, gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1211 pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->u.p.ordered_lower,
1212 pr->u.p.ordered_upper, pr->u.p.parm1, pr->u.p.parm2,
1213 pr->u.p.parm3, pr->u.p.parm4));
1214 __kmp_str_free(&buff);
1217 #if (KMP_STATIC_STEAL_ENABLED) 1223 if (schedule == kmp_sch_static_steal) {
1227 volatile T *p = &pr->u.p.static_steal_counter;
1230 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1232 #if OMPT_SUPPORT && OMPT_OPTIONAL 1233 if (ompt_enabled.ompt_callback_work) {
1234 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1235 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1236 kmp_info_t *thr = __kmp_threads[gtid];
1237 ompt_callbacks.ompt_callback(ompt_callback_work)(
1238 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
1239 &(task_info->task_data), tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1249 template <
typename UT>
1250 static void __kmp_dispatch_finish(
int gtid,
ident_t *loc) {
1251 typedef typename traits_t<UT>::signed_t ST;
1252 kmp_info_t *th = __kmp_threads[gtid];
1254 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1255 if (!th->th.th_team->t.t_serialized) {
1257 dispatch_private_info_template<UT> *pr =
1258 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1259 th->th.th_dispatch->th_dispatch_pr_current);
1260 dispatch_shared_info_template<UT>
volatile *sh =
1261 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1262 th->th.th_dispatch->th_dispatch_sh_current);
1263 KMP_DEBUG_ASSERT(pr);
1264 KMP_DEBUG_ASSERT(sh);
1265 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1266 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1268 if (pr->ordered_bumped) {
1271 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1273 pr->ordered_bumped = 0;
1275 UT lower = pr->u.p.ordered_lower;
1281 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d before wait: " 1282 "ordered_iteration:%%%s lower:%%%s\n",
1283 traits_t<UT>::spec, traits_t<UT>::spec);
1284 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1285 __kmp_str_free(&buff);
1289 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1290 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1296 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d after wait: " 1297 "ordered_iteration:%%%s lower:%%%s\n",
1298 traits_t<UT>::spec, traits_t<UT>::spec);
1299 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1300 __kmp_str_free(&buff);
1304 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
1307 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1310 #ifdef KMP_GOMP_COMPAT 1312 template <
typename UT>
1313 static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc) {
1314 typedef typename traits_t<UT>::signed_t ST;
1315 kmp_info_t *th = __kmp_threads[gtid];
1317 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1318 if (!th->th.th_team->t.t_serialized) {
1320 dispatch_private_info_template<UT> *pr =
1321 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1322 th->th.th_dispatch->th_dispatch_pr_current);
1323 dispatch_shared_info_template<UT>
volatile *sh =
1324 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1325 th->th.th_dispatch->th_dispatch_sh_current);
1326 KMP_DEBUG_ASSERT(pr);
1327 KMP_DEBUG_ASSERT(sh);
1328 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1329 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1332 UT lower = pr->u.p.ordered_lower;
1333 UT upper = pr->u.p.ordered_upper;
1334 UT inc = upper - lower + 1;
1336 if (pr->ordered_bumped == inc) {
1339 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1341 pr->ordered_bumped = 0;
1343 inc -= pr->ordered_bumped;
1349 buff = __kmp_str_format(
1350 "__kmp_dispatch_finish_chunk: T#%%d before wait: " 1351 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1352 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1353 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
1354 __kmp_str_free(&buff);
1358 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1359 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1362 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting " 1363 "ordered_bumped to zero\n",
1365 pr->ordered_bumped = 0;
1371 buff = __kmp_str_format(
1372 "__kmp_dispatch_finish_chunk: T#%%d after wait: " 1373 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1374 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1375 traits_t<UT>::spec);
1377 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
1378 __kmp_str_free(&buff);
1382 test_then_add<ST>((
volatile ST *)&sh->u.s.ordered_iteration, inc);
1386 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1394 #if OMPT_SUPPORT && OMPT_OPTIONAL 1395 #define OMPT_LOOP_END \ 1396 if (status == 0) { \ 1397 if (ompt_enabled.ompt_callback_work) { \ 1398 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 1399 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ 1400 ompt_callbacks.ompt_callback(ompt_callback_work)( \ 1401 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ 1402 &(task_info->task_data), 0, codeptr); \ 1407 #define OMPT_LOOP_END // no-op 1410 template <
typename T>
1411 static int __kmp_dispatch_next(
ident_t *loc,
int gtid, kmp_int32 *p_last,
1413 typename traits_t<T>::signed_t *p_st
1414 #
if OMPT_SUPPORT && OMPT_OPTIONAL
1420 typedef typename traits_t<T>::unsigned_t UT;
1421 typedef typename traits_t<T>::signed_t ST;
1422 typedef typename traits_t<T>::floating_t DBL;
1428 KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
1431 dispatch_private_info_template<T> *pr;
1432 kmp_info_t *th = __kmp_threads[gtid];
1433 kmp_team_t *team = th->th.th_team;
1435 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st);
1440 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d called p_lb:%%%s " 1441 "p_ub:%%%s p_st:%%%s p_last: %%p\n",
1442 traits_t<T>::spec, traits_t<T>::spec,
1443 traits_t<ST>::spec);
1444 KD_TRACE(1000, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last));
1445 __kmp_str_free(&buff);
1449 if (team->t.t_serialized) {
1451 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1452 th->th.th_dispatch->th_disp_buffer);
1453 KMP_DEBUG_ASSERT(pr);
1455 if ((status = (pr->u.p.tc != 0)) == 0) {
1462 if (__kmp_env_consistency_check) {
1463 if (pr->pushed_ws != ct_none) {
1464 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1467 }
else if (pr->nomerge) {
1470 UT limit, trip, init;
1472 T chunk = pr->u.p.parm1;
1474 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1477 init = chunk * pr->u.p.count++;
1478 trip = pr->u.p.tc - 1;
1480 if ((status = (init <= trip)) == 0) {
1487 if (__kmp_env_consistency_check) {
1488 if (pr->pushed_ws != ct_none) {
1489 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1494 limit = chunk + init - 1;
1497 if ((last = (limit >= trip)) != 0) {
1500 pr->u.p.last_upper = pr->u.p.ub;
1508 *p_lb = start + init;
1509 *p_ub = start + limit;
1511 *p_lb = start + init * incr;
1512 *p_ub = start + limit * incr;
1516 pr->u.p.ordered_lower = init;
1517 pr->u.p.ordered_upper = limit;
1522 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1523 "ordered_lower:%%%s ordered_upper:%%%s\n",
1524 traits_t<UT>::spec, traits_t<UT>::spec);
1525 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1526 pr->u.p.ordered_upper));
1527 __kmp_str_free(&buff);
1537 pr->u.p.last_upper = *p_ub;
1548 buff = __kmp_str_format(
1549 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " 1550 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1551 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1552 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status));
1553 __kmp_str_free(&buff);
1556 #if INCLUDE_SSC_MARKS 1557 SSC_MARK_DISPATCH_NEXT();
1563 dispatch_shared_info_template<UT> *sh;
1566 UT limit, trip, init;
1568 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1569 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1571 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1572 th->th.th_dispatch->th_dispatch_pr_current);
1573 KMP_DEBUG_ASSERT(pr);
1574 sh =
reinterpret_cast<dispatch_shared_info_template<UT> *
>(
1575 th->th.th_dispatch->th_dispatch_sh_current);
1576 KMP_DEBUG_ASSERT(sh);
1578 if (pr->u.p.tc == 0) {
1582 switch (pr->schedule) {
1583 #if (KMP_STATIC_STEAL_ENABLED) 1584 case kmp_sch_static_steal: {
1585 T chunk = pr->u.p.parm1;
1586 int nproc = th->th.th_team_nproc;
1588 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n",
1591 trip = pr->u.p.tc - 1;
1593 if (traits_t<T>::type_size > 4) {
1596 kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock;
1597 KMP_DEBUG_ASSERT(lck != NULL);
1598 if (pr->u.p.count < (UT)pr->u.p.ub) {
1599 __kmp_acquire_lock(lck, gtid);
1601 init = (pr->u.p.count)++;
1602 status = (init < (UT)pr->u.p.ub);
1603 __kmp_release_lock(lck, gtid);
1608 kmp_info_t **other_threads = team->t.t_threads;
1609 int while_limit = nproc;
1610 int while_index = 0;
1613 while ((!status) && (while_limit != ++while_index)) {
1615 T victimIdx = pr->u.p.parm4;
1616 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1617 dispatch_private_info_template<T> *victim =
1618 reinterpret_cast<dispatch_private_info_template<T> *
>(
1619 other_threads[victimIdx]
1620 ->th.th_dispatch->th_dispatch_pr_current);
1621 while ((victim == NULL || victim == pr ||
1622 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1623 *(
volatile T *)&pr->u.p.static_steal_counter)) &&
1624 oldVictimIdx != victimIdx) {
1625 victimIdx = (victimIdx + 1) % nproc;
1626 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1627 other_threads[victimIdx]
1628 ->th.th_dispatch->th_dispatch_pr_current);
1631 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1632 *(
volatile T *)&pr->u.p.static_steal_counter)) {
1637 if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
1638 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1642 lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
1643 KMP_ASSERT(lck != NULL);
1644 __kmp_acquire_lock(lck, gtid);
1645 limit = victim->u.p.ub;
1646 if (victim->u.p.count >= limit ||
1647 (remaining = limit - victim->u.p.count) < 2) {
1648 __kmp_release_lock(lck, gtid);
1649 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1654 if (remaining > 3) {
1656 init = (victim->u.p.ub -=
1661 (victim->u.p.ub -= 1);
1663 __kmp_release_lock(lck, gtid);
1665 KMP_DEBUG_ASSERT(init + 1 <= limit);
1666 pr->u.p.parm4 = victimIdx;
1670 __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
1671 pr->u.p.count = init + 1;
1673 __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
1688 union_i4 vold, vnew;
1689 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1692 while (!KMP_COMPARE_AND_STORE_ACQ64(
1693 (
volatile kmp_int64 *)&pr->u.p.count,
1694 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1695 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1697 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1702 init = vnew.p.count;
1703 status = (init < (UT)vnew.p.ub);
1707 kmp_info_t **other_threads = team->t.t_threads;
1708 int while_limit = nproc;
1709 int while_index = 0;
1713 while ((!status) && (while_limit != ++while_index)) {
1714 union_i4 vold, vnew;
1715 kmp_int32 remaining;
1716 T victimIdx = pr->u.p.parm4;
1717 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1718 dispatch_private_info_template<T> *victim =
1719 reinterpret_cast<dispatch_private_info_template<T> *
>(
1720 other_threads[victimIdx]
1721 ->th.th_dispatch->th_dispatch_pr_current);
1722 while ((victim == NULL || victim == pr ||
1723 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1724 *(
volatile T *)&pr->u.p.static_steal_counter)) &&
1725 oldVictimIdx != victimIdx) {
1726 victimIdx = (victimIdx + 1) % nproc;
1727 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1728 other_threads[victimIdx]
1729 ->th.th_dispatch->th_dispatch_pr_current);
1732 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1733 *(
volatile T *)&pr->u.p.static_steal_counter)) {
1738 pr->u.p.parm4 = victimIdx;
1740 vold.b = *(
volatile kmp_int64 *)(&victim->u.p.count);
1743 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1744 if (vnew.p.count >= (UT)vnew.p.ub ||
1745 (remaining = vnew.p.ub - vnew.p.count) < 2) {
1747 (victimIdx + 1) % nproc;
1750 if (remaining > 3) {
1751 vnew.p.ub -= (remaining >> 2);
1755 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1757 if (KMP_COMPARE_AND_STORE_ACQ64(
1758 (
volatile kmp_int64 *)&victim->u.p.count,
1759 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1760 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1763 vold.p.ub - vnew.p.ub);
1768 vold.p.count = init + 1;
1770 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count),
1773 *(
volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1788 start = pr->u.p.parm2;
1790 limit = chunk + init - 1;
1794 KMP_DEBUG_ASSERT(init <= trip);
1795 if ((last = (limit >= trip)) != 0)
1801 *p_lb = start + init;
1802 *p_ub = start + limit;
1804 *p_lb = start + init * incr;
1805 *p_ub = start + limit * incr;
1809 pr->u.p.ordered_lower = init;
1810 pr->u.p.ordered_upper = limit;
1815 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1816 "ordered_lower:%%%s ordered_upper:%%%s\n",
1817 traits_t<UT>::spec, traits_t<UT>::spec);
1818 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1819 pr->u.p.ordered_upper));
1820 __kmp_str_free(&buff);
1827 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1828 case kmp_sch_static_balanced: {
1831 (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid));
1832 if ((status = !pr->u.p.count) !=
1837 last = pr->u.p.parm1;
1841 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1848 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1849 "ordered_lower:%%%s ordered_upper:%%%s\n",
1850 traits_t<UT>::spec, traits_t<UT>::spec);
1851 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1852 pr->u.p.ordered_upper));
1853 __kmp_str_free(&buff);
1859 case kmp_sch_static_greedy:
1861 case kmp_sch_static_chunked: {
1864 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d " 1865 "kmp_sch_static_[affinity|chunked] case\n",
1867 parm1 = pr->u.p.parm1;
1869 trip = pr->u.p.tc - 1;
1870 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1872 if ((status = (init <= trip)) != 0) {
1875 limit = parm1 + init - 1;
1877 if ((last = (limit >= trip)) != 0)
1883 pr->u.p.count += th->th.th_team_nproc;
1886 *p_lb = start + init;
1887 *p_ub = start + limit;
1889 *p_lb = start + init * incr;
1890 *p_ub = start + limit * incr;
1894 pr->u.p.ordered_lower = init;
1895 pr->u.p.ordered_upper = limit;
1900 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1901 "ordered_lower:%%%s ordered_upper:%%%s\n",
1902 traits_t<UT>::spec, traits_t<UT>::spec);
1903 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1904 pr->u.p.ordered_upper));
1905 __kmp_str_free(&buff);
1913 case kmp_sch_dynamic_chunked: {
1914 T chunk = pr->u.p.parm1;
1918 (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid));
1920 init = chunk * test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1921 trip = pr->u.p.tc - 1;
1923 if ((status = (init <= trip)) == 0) {
1930 limit = chunk + init - 1;
1933 if ((last = (limit >= trip)) != 0)
1940 *p_lb = start + init;
1941 *p_ub = start + limit;
1943 *p_lb = start + init * incr;
1944 *p_ub = start + limit * incr;
1948 pr->u.p.ordered_lower = init;
1949 pr->u.p.ordered_upper = limit;
1954 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1955 "ordered_lower:%%%s ordered_upper:%%%s\n",
1956 traits_t<UT>::spec, traits_t<UT>::spec);
1957 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1958 pr->u.p.ordered_upper));
1959 __kmp_str_free(&buff);
1967 case kmp_sch_guided_iterative_chunked: {
1968 T chunkspec = pr->u.p.parm1;
1969 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " 1976 init = sh->u.s.iteration;
1977 remaining = trip - init;
1978 if (remaining <= 0) {
1987 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1989 remaining = trip - init;
1990 if (remaining <= 0) {
1994 if ((T)remaining > chunkspec) {
1995 limit = init + chunkspec - 1;
1998 limit = init + remaining - 1;
2003 limit = init + (UT)(remaining *
2004 *(
double *)&pr->u.p.parm3);
2005 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2006 (ST)init, (ST)limit)) {
2018 *p_lb = start + init * incr;
2019 *p_ub = start + limit * incr;
2021 pr->u.p.ordered_lower = init;
2022 pr->u.p.ordered_upper = limit;
2027 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2028 "ordered_lower:%%%s ordered_upper:%%%s\n",
2029 traits_t<UT>::spec, traits_t<UT>::spec);
2030 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2031 pr->u.p.ordered_upper));
2032 __kmp_str_free(&buff);
2045 case kmp_sch_guided_simd: {
2048 T chunk = pr->u.p.parm1;
2049 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_simd case\n",
2055 init = sh->u.s.iteration;
2056 remaining = trip - init;
2057 if (remaining <= 0) {
2061 KMP_DEBUG_ASSERT(init % chunk == 0);
2063 if ((T)remaining < pr->u.p.parm2) {
2066 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2068 remaining = trip - init;
2069 if (remaining <= 0) {
2074 if ((T)remaining > chunk) {
2075 limit = init + chunk - 1;
2078 limit = init + remaining - 1;
2084 UT span = remaining * (*(
double *)&pr->u.p.parm3);
2085 UT rem = span % chunk;
2087 span += chunk - rem;
2088 limit = init + span;
2089 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2090 (ST)init, (ST)limit)) {
2102 *p_lb = start + init * incr;
2103 *p_ub = start + limit * incr;
2105 pr->u.p.ordered_lower = init;
2106 pr->u.p.ordered_upper = limit;
2111 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2112 "ordered_lower:%%%s ordered_upper:%%%s\n",
2113 traits_t<UT>::spec, traits_t<UT>::spec);
2114 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2115 pr->u.p.ordered_upper));
2116 __kmp_str_free(&buff);
2129 case kmp_sch_guided_analytical_chunked: {
2130 T chunkspec = pr->u.p.parm1;
2132 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2135 unsigned int oldFpcw;
2136 unsigned int fpcwSet = 0;
2138 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " 2139 "analytical case\n",
2144 KMP_DEBUG_ASSERT(th->th.th_team_nproc > 1);
2145 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)th->th.th_team_nproc <
2150 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
2151 if (chunkIdx >= (UT)pr->u.p.parm2) {
2154 init = chunkIdx * chunkspec + pr->u.p.count;
2157 if ((status = (init > 0 && init <= trip)) != 0) {
2158 limit = init + chunkspec - 1;
2160 if ((last = (limit >= trip)) != 0)
2170 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2175 oldFpcw = _control87(0, 0);
2176 _control87(_PC_64, _MCW_PC);
2181 init = __kmp_dispatch_guided_remaining<T>(
2182 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
2183 KMP_DEBUG_ASSERT(init);
2187 limit = trip - __kmp_dispatch_guided_remaining<T>(
2188 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
2189 KMP_ASSERT(init <= limit);
2191 KMP_DEBUG_ASSERT(limit <= trip);
2198 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2202 if (fpcwSet && (oldFpcw & fpcwSet))
2203 _control87(oldFpcw, _MCW_PC);
2210 *p_lb = start + init * incr;
2211 *p_ub = start + limit * incr;
2213 pr->u.p.ordered_lower = init;
2214 pr->u.p.ordered_upper = limit;
2219 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2220 "ordered_lower:%%%s ordered_upper:%%%s\n",
2221 traits_t<UT>::spec, traits_t<UT>::spec);
2222 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2223 pr->u.p.ordered_upper));
2224 __kmp_str_free(&buff);
2237 case kmp_sch_trapezoidal: {
2239 T parm2 = pr->u.p.parm2;
2240 T parm3 = pr->u.p.parm3;
2241 T parm4 = pr->u.p.parm4;
2242 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2245 index = test_then_inc<ST>((
volatile ST *)&sh->u.s.iteration);
2247 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
2248 trip = pr->u.p.tc - 1;
2250 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
2257 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
2260 if ((last = (limit >= trip)) != 0)
2267 *p_lb = start + init;
2268 *p_ub = start + limit;
2270 *p_lb = start + init * incr;
2271 *p_ub = start + limit * incr;
2275 pr->u.p.ordered_lower = init;
2276 pr->u.p.ordered_upper = limit;
2281 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2282 "ordered_lower:%%%s ordered_upper:%%%s\n",
2283 traits_t<UT>::spec, traits_t<UT>::spec);
2284 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2285 pr->u.p.ordered_upper));
2286 __kmp_str_free(&buff);
2295 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
2296 KMP_HNT(GetNewerLibrary),
2306 num_done = test_then_inc<ST>((
volatile ST *)&sh->u.s.num_done);
2311 buff = __kmp_str_format(
2312 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2313 traits_t<UT>::spec);
2314 KD_TRACE(100, (buff, gtid, sh->u.s.num_done));
2315 __kmp_str_free(&buff);
2319 if ((ST)num_done == th->th.th_team_nproc - 1) {
2320 #if (KMP_STATIC_STEAL_ENABLED) 2321 if (pr->schedule == kmp_sch_static_steal &&
2322 traits_t<T>::type_size > 4) {
2324 kmp_info_t **other_threads = team->t.t_threads;
2326 for (i = 0; i < th->th.th_team_nproc; ++i) {
2327 kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock;
2328 KMP_ASSERT(lck != NULL);
2329 __kmp_destroy_lock(lck);
2331 other_threads[i]->th.th_dispatch->th_steal_lock = NULL;
2339 sh->u.s.num_done = 0;
2340 sh->u.s.iteration = 0;
2344 sh->u.s.ordered_iteration = 0;
2349 sh->buffer_index += __kmp_dispatch_num_buffers;
2350 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2351 gtid, sh->buffer_index));
2356 if (__kmp_env_consistency_check) {
2357 if (pr->pushed_ws != ct_none) {
2358 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2362 th->th.th_dispatch->th_deo_fcn = NULL;
2363 th->th.th_dispatch->th_dxo_fcn = NULL;
2364 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2365 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2369 pr->u.p.last_upper = pr->u.p.ub;
2372 if (p_last != NULL && status != 0)
2380 buff = __kmp_str_format(
2381 "__kmp_dispatch_next: T#%%d normal case: " 2382 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2383 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2384 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status));
2385 __kmp_str_free(&buff);
2388 #if INCLUDE_SSC_MARKS 2389 SSC_MARK_DISPATCH_NEXT();
2395 template <
typename T>
2396 static void __kmp_dist_get_bounds(
ident_t *loc, kmp_int32 gtid,
2397 kmp_int32 *plastiter, T *plower, T *pupper,
2398 typename traits_t<T>::signed_t incr) {
2399 typedef typename traits_t<T>::unsigned_t UT;
2400 typedef typename traits_t<T>::signed_t ST;
2407 KMP_DEBUG_ASSERT(plastiter && plower && pupper);
2408 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2413 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d " 2414 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2415 traits_t<T>::spec, traits_t<T>::spec,
2416 traits_t<ST>::spec, traits_t<T>::spec);
2417 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2418 __kmp_str_free(&buff);
2422 if (__kmp_env_consistency_check) {
2424 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2427 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2437 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2440 th = __kmp_threads[gtid];
2441 team = th->th.th_team;
2443 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2444 nteams = th->th.th_teams_size.nteams;
2446 team_id = team->t.t_master_tid;
2447 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2451 trip_count = *pupper - *plower + 1;
2452 }
else if (incr == -1) {
2453 trip_count = *plower - *pupper + 1;
2454 }
else if (incr > 0) {
2456 trip_count = (UT)(*pupper - *plower) / incr + 1;
2458 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2461 if (trip_count <= nteams) {
2463 __kmp_static == kmp_sch_static_greedy ||
2465 kmp_sch_static_balanced);
2467 if (team_id < trip_count) {
2468 *pupper = *plower = *plower + team_id * incr;
2470 *plower = *pupper + incr;
2472 if (plastiter != NULL)
2473 *plastiter = (team_id == trip_count - 1);
2475 if (__kmp_static == kmp_sch_static_balanced) {
2476 UT chunk = trip_count / nteams;
2477 UT extras = trip_count % nteams;
2479 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2480 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2481 if (plastiter != NULL)
2482 *plastiter = (team_id == nteams - 1);
2485 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2487 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
2489 *plower += team_id * chunk_inc_count;
2490 *pupper = *plower + chunk_inc_count - incr;
2493 if (*pupper < *plower)
2494 *pupper = traits_t<T>::max_value;
2495 if (plastiter != NULL)
2496 *plastiter = *plower <= upper && *pupper > upper - incr;
2497 if (*pupper > upper)
2500 if (*pupper > *plower)
2501 *pupper = traits_t<T>::min_value;
2502 if (plastiter != NULL)
2503 *plastiter = *plower >= upper && *pupper < upper - incr;
2504 if (*pupper < upper)
2536 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2537 KMP_DEBUG_ASSERT(__kmp_init_serial);
2538 #if OMPT_SUPPORT && OMPT_OPTIONAL 2539 OMPT_STORE_RETURN_ADDRESS(gtid);
2541 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2548 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2549 KMP_DEBUG_ASSERT(__kmp_init_serial);
2550 #if OMPT_SUPPORT && OMPT_OPTIONAL 2551 OMPT_STORE_RETURN_ADDRESS(gtid);
2553 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2561 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2562 KMP_DEBUG_ASSERT(__kmp_init_serial);
2563 #if OMPT_SUPPORT && OMPT_OPTIONAL 2564 OMPT_STORE_RETURN_ADDRESS(gtid);
2566 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2574 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2575 KMP_DEBUG_ASSERT(__kmp_init_serial);
2576 #if OMPT_SUPPORT && OMPT_OPTIONAL 2577 OMPT_STORE_RETURN_ADDRESS(gtid);
2579 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2593 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2595 KMP_DEBUG_ASSERT(__kmp_init_serial);
2596 #if OMPT_SUPPORT && OMPT_OPTIONAL 2597 OMPT_STORE_RETURN_ADDRESS(gtid);
2599 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2600 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2603 void __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2605 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2607 KMP_DEBUG_ASSERT(__kmp_init_serial);
2608 #if OMPT_SUPPORT && OMPT_OPTIONAL 2609 OMPT_STORE_RETURN_ADDRESS(gtid);
2611 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2612 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2615 void __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2617 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2619 KMP_DEBUG_ASSERT(__kmp_init_serial);
2620 #if OMPT_SUPPORT && OMPT_OPTIONAL 2621 OMPT_STORE_RETURN_ADDRESS(gtid);
2623 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2624 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2627 void __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2629 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2631 KMP_DEBUG_ASSERT(__kmp_init_serial);
2632 #if OMPT_SUPPORT && OMPT_OPTIONAL 2633 OMPT_STORE_RETURN_ADDRESS(gtid);
2635 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2636 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2653 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2654 #if OMPT_SUPPORT && OMPT_OPTIONAL 2655 OMPT_STORE_RETURN_ADDRESS(gtid);
2657 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2658 #if OMPT_SUPPORT && OMPT_OPTIONAL 2660 OMPT_LOAD_RETURN_ADDRESS(gtid)
2669 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2671 #if OMPT_SUPPORT && OMPT_OPTIONAL 2672 OMPT_STORE_RETURN_ADDRESS(gtid);
2674 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2675 #if OMPT_SUPPORT && OMPT_OPTIONAL 2677 OMPT_LOAD_RETURN_ADDRESS(gtid)
2686 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2687 #if OMPT_SUPPORT && OMPT_OPTIONAL 2688 OMPT_STORE_RETURN_ADDRESS(gtid);
2690 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2691 #if OMPT_SUPPORT && OMPT_OPTIONAL 2693 OMPT_LOAD_RETURN_ADDRESS(gtid)
2702 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2704 #if OMPT_SUPPORT && OMPT_OPTIONAL 2705 OMPT_STORE_RETURN_ADDRESS(gtid);
2707 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2708 #if OMPT_SUPPORT && OMPT_OPTIONAL 2710 OMPT_LOAD_RETURN_ADDRESS(gtid)
2722 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2729 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2736 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2743 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2750 kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2751 return value == checker;
2754 kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2755 return value != checker;
2758 kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2759 return value < checker;
2762 kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2763 return value >= checker;
2766 kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2767 return value <= checker;
2771 __kmp_wait_yield_4(
volatile kmp_uint32 *spinner, kmp_uint32 checker,
2772 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2776 volatile kmp_uint32 *spin = spinner;
2777 kmp_uint32 check = checker;
2779 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2782 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
2783 KMP_INIT_YIELD(spins);
2785 while (!f(r = TCR_4(*spin), check)) {
2786 KMP_FSYNC_SPIN_PREPARE(obj);
2794 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2795 KMP_YIELD_SPIN(spins);
2797 KMP_FSYNC_SPIN_ACQUIRED(obj);
2801 void __kmp_wait_yield_4_ptr(
2802 void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(
void *, kmp_uint32),
2806 void *spin = spinner;
2807 kmp_uint32 check = checker;
2809 kmp_uint32 (*f)(
void *, kmp_uint32) = pred;
2811 KMP_FSYNC_SPIN_INIT(obj, spin);
2812 KMP_INIT_YIELD(spins);
2814 while (!f(spin, check)) {
2815 KMP_FSYNC_SPIN_PREPARE(obj);
2818 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2819 KMP_YIELD_SPIN(spins);
2821 KMP_FSYNC_SPIN_ACQUIRED(obj);
2826 #ifdef KMP_GOMP_COMPAT 2828 void __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
2830 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2832 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2836 void __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2838 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2840 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2844 void __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2846 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2848 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2852 void __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2854 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2856 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2860 void __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid) {
2861 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2864 void __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid) {
2865 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2868 void __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid) {
2869 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2872 void __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid) {
2873 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)