19 #include "kmp_wait_release.h" 20 #include "kmp_stats.h" 23 #include "ompt-specific.h" 26 #include "tsan_annotations.h" 33 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
34 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
35 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
38 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
41 #ifdef BUILD_TIED_TASK_STACK 53 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data,
int threshold,
char *location )
55 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
56 kmp_taskdata_t **stack_top = task_stack -> ts_top;
57 kmp_int32 entries = task_stack -> ts_entries;
58 kmp_taskdata_t *tied_task;
60 KA_TRACE(threshold, (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 61 "first_block = %p, stack_top = %p \n",
62 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
64 KMP_DEBUG_ASSERT( stack_top != NULL );
65 KMP_DEBUG_ASSERT( entries > 0 );
67 while ( entries != 0 )
69 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
71 if ( entries & TASK_STACK_INDEX_MASK == 0 )
73 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
75 stack_block = stack_block -> sb_prev;
76 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
83 tied_task = * stack_top;
85 KMP_DEBUG_ASSERT( tied_task != NULL );
86 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
88 KA_TRACE(threshold, (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 89 "stack_top=%p, tied_task=%p\n",
90 location, gtid, entries, stack_top, tied_task ) );
92 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
94 KA_TRACE(threshold, (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
107 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
109 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
110 kmp_stack_block_t *first_block;
113 first_block = & task_stack -> ts_first_block;
114 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
115 memset( (
void *) first_block,
'\0', TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
118 task_stack -> ts_entries = TASK_STACK_EMPTY;
119 first_block -> sb_next = NULL;
120 first_block -> sb_prev = NULL;
131 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
133 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
134 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
136 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
138 while ( stack_block != NULL ) {
139 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
141 stack_block -> sb_next = NULL;
142 stack_block -> sb_prev = NULL;
143 if (stack_block != & task_stack -> ts_first_block) {
144 __kmp_thread_free( thread, stack_block );
146 stack_block = next_block;
149 task_stack -> ts_entries = 0;
150 task_stack -> ts_top = NULL;
163 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
166 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
167 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
168 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
170 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
174 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
175 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
177 KA_TRACE(20, (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
178 gtid, thread, tied_task ) );
180 * (task_stack -> ts_top) = tied_task;
183 task_stack -> ts_top++;
184 task_stack -> ts_entries++;
186 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
189 kmp_stack_block_t *stack_block =
190 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
193 if ( stack_block -> sb_next != NULL )
195 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
199 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
200 __kmp_thread_calloc(thread,
sizeof(kmp_stack_block_t));
202 task_stack -> ts_top = & new_block -> sb_block[0];
203 stack_block -> sb_next = new_block;
204 new_block -> sb_prev = stack_block;
205 new_block -> sb_next = NULL;
207 KA_TRACE(30, (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
208 gtid, tied_task, new_block ) );
211 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
224 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
227 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
228 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
229 kmp_taskdata_t *tied_task;
231 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
235 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
236 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
238 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
241 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
243 kmp_stack_block_t *stack_block =
244 (kmp_stack_block_t *) (task_stack -> ts_top) ;
246 stack_block = stack_block -> sb_prev;
247 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
251 task_stack -> ts_top--;
252 task_stack -> ts_entries--;
254 tied_task = * (task_stack -> ts_top );
256 KMP_DEBUG_ASSERT( tied_task != NULL );
257 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
258 KMP_DEBUG_ASSERT( tied_task == ending_task );
260 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
269 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
271 kmp_info_t * thread = __kmp_threads[ gtid ];
272 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
273 kmp_task_team_t * task_team = thread->th.th_task_team;
274 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
275 kmp_thread_data_t * thread_data;
277 KA_TRACE(20, (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
279 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
281 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
282 KA_TRACE(20, (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
283 gtid, counter, taskdata ) );
287 if ( taskdata->td_flags.task_serial ) {
288 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
290 return TASK_NOT_PUSHED;
294 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
295 if ( ! KMP_TASKING_ENABLED(task_team) ) {
296 __kmp_enable_tasking( task_team, thread );
298 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
299 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
302 thread_data = & task_team -> tt.tt_threads_data[ tid ];
305 if (thread_data -> td.td_deque == NULL ) {
306 __kmp_alloc_task_deque( thread, thread_data );
310 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
312 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
314 return TASK_NOT_PUSHED;
318 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
322 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
324 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
325 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
327 return TASK_NOT_PUSHED;
331 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
334 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
336 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
337 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
339 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 340 "task=%p ntasks=%d head=%u tail=%u\n",
341 gtid, taskdata, thread_data->td.td_deque_ntasks,
342 thread_data->td.td_deque_head, thread_data->td.td_deque_tail) );
344 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
346 return TASK_SUCCESSFULLY_PUSHED;
355 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
357 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " 358 "curtask_parent=%p\n",
359 0, this_thr, this_thr -> th.th_current_task,
360 this_thr -> th.th_current_task -> td_parent ) );
362 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
364 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " 365 "curtask_parent=%p\n",
366 0, this_thr, this_thr -> th.th_current_task,
367 this_thr -> th.th_current_task -> td_parent ) );
378 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team,
int tid )
381 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " 383 tid, this_thr, this_thr->th.th_current_task,
384 team->t.t_implicit_task_taskdata[tid].td_parent ) );
386 KMP_DEBUG_ASSERT (this_thr != NULL);
389 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
390 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
391 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
394 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
395 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
398 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " 400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent ) );
412 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
414 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
415 kmp_info_t * thread = __kmp_threads[ gtid ];
417 KA_TRACE(10, (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
418 gtid, taskdata, current_task) );
420 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
425 current_task -> td_flags.executing = 0;
428 #ifdef BUILD_TIED_TASK_STACK 429 if ( taskdata -> td_flags.tiedness == TASK_TIED )
431 __kmp_push_task_stack( gtid, thread, taskdata );
436 thread -> th.th_current_task = taskdata;
438 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
439 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
440 taskdata -> td_flags.started = 1;
441 taskdata -> td_flags.executing = 1;
442 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
443 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
450 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n",
455 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
456 kmp_taskdata_t *parent = taskdata->td_parent;
457 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
458 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
459 parent ? &(parent->ompt_task_info.frame) : NULL,
460 taskdata->ompt_task_info.task_id,
461 taskdata->ompt_task_info.function);
464 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE 466 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
467 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
469 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
470 taskdata->ompt_task_info.task_id,
471 taskdata->ompt_task_info.deps,
472 taskdata->ompt_task_info.ndeps
475 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
476 taskdata->ompt_task_info.deps = NULL;
477 taskdata->ompt_task_info.ndeps = 0;
492 __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
494 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
495 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
497 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
498 gtid, loc_ref, taskdata, current_task ) );
500 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
502 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
503 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
504 gtid, counter, taskdata ) );
507 taskdata -> td_flags.task_serial = 1;
508 __kmp_task_start( gtid, task, current_task );
510 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
511 gtid, loc_ref, taskdata ) );
522 __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
524 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
526 KA_TRACE(10, (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
527 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
529 __kmp_task_start( gtid, task, current_task );
531 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
532 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
536 #endif // TASK_UNUSED 546 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
548 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n",
552 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
553 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
554 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
555 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
556 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
557 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
559 taskdata->td_flags.freed = 1;
560 ANNOTATE_HAPPENS_BEFORE(taskdata);
563 __kmp_fast_free( thread, taskdata );
565 __kmp_thread_free( thread, taskdata );
568 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n",
580 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
585 kmp_int32 team_serial = ( taskdata->td_flags.team_serial ||
586 taskdata->td_flags.tasking_ser ) && !taskdata->td_flags.proxy;
588 kmp_int32 team_serial = taskdata->td_flags.team_serial ||
589 taskdata->td_flags.tasking_ser;
591 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
593 kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
594 KMP_DEBUG_ASSERT( children >= 0 );
597 while ( children == 0 )
599 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
601 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 602 "and freeing itself\n", gtid, taskdata) );
605 __kmp_free_task( gtid, taskdata, thread );
607 taskdata = parent_taskdata;
611 if ( team_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
615 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
616 KMP_DEBUG_ASSERT( children >= 0 );
619 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 620 "not freeing it yet\n", gtid, taskdata, children) );
630 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
632 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
633 kmp_info_t * thread = __kmp_threads[ gtid ];
634 kmp_task_team_t * task_team = thread->th.th_task_team;
635 kmp_int32 children = 0;
639 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
640 kmp_taskdata_t *parent = taskdata->td_parent;
641 ompt_callbacks.ompt_callback(ompt_event_task_end)(
642 taskdata->ompt_task_info.task_id);
646 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
647 gtid, taskdata, resumed_task) );
649 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
652 #ifdef BUILD_TIED_TASK_STACK 653 if ( taskdata -> td_flags.tiedness == TASK_TIED )
655 __kmp_pop_task_stack( gtid, thread, taskdata );
659 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
661 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
662 KA_TRACE(20, (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
663 gtid, counter, taskdata ) );
666 if (resumed_task == NULL) {
667 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
668 resumed_task = taskdata->td_parent;
670 thread->th.th_current_task = resumed_task;
671 resumed_task->td_flags.executing = 1;
672 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
673 gtid, taskdata, resumed_task) );
678 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
679 taskdata -> td_flags.complete = 1;
680 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
681 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
684 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
686 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
687 KMP_DEBUG_ASSERT( children >= 0 );
689 if ( taskdata->td_taskgroup )
690 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
695 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) {
697 __kmp_release_deps(gtid,taskdata);
704 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
705 taskdata -> td_flags.executing = 0;
707 KA_TRACE(20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
708 gtid, taskdata, children) );
718 if (taskdata->td_flags.destructors_thunk) {
719 kmp_routine_entry_t destr_thunk = task->data1.destructors;
720 KMP_ASSERT(destr_thunk);
721 destr_thunk(gtid, task);
723 #endif // OMP_40_ENABLED 727 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
728 taskdata->td_flags.task_serial);
729 if ( taskdata->td_flags.task_serial )
731 if (resumed_task == NULL) {
732 resumed_task = taskdata->td_parent;
736 if ( !(task_team && task_team->tt.tt_found_proxy_tasks) )
740 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
744 KMP_DEBUG_ASSERT( resumed_task != NULL );
751 thread->th.th_current_task = resumed_task;
752 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
756 resumed_task->td_flags.executing = 1;
758 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
759 gtid, taskdata, resumed_task) );
771 __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
773 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
774 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
776 __kmp_task_finish( gtid, task, NULL );
778 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
779 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
790 __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
792 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
793 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
795 __kmp_task_finish( gtid, task, NULL );
797 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
798 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
801 #endif // TASK_UNUSED 811 __kmp_task_init_ompt( kmp_taskdata_t * task,
int tid,
void *
function )
814 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
815 task->ompt_task_info.function =
function;
816 task->ompt_task_info.frame.exit_runtime_frame = NULL;
817 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
819 task->ompt_task_info.ndeps = 0;
820 task->ompt_task_info.deps = NULL;
839 __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int set_curr_task )
841 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
843 KF_TRACE(10, (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
844 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE" ) );
846 task->td_task_id = KMP_GEN_TASK_ID();
847 task->td_team = team;
849 task->td_ident = loc_ref;
850 task->td_taskwait_ident = NULL;
851 task->td_taskwait_counter = 0;
852 task->td_taskwait_thread = 0;
854 task->td_flags.tiedness = TASK_TIED;
855 task->td_flags.tasktype = TASK_IMPLICIT;
857 task->td_flags.proxy = TASK_FULL;
861 task->td_flags.task_serial = 1;
862 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
863 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
865 task->td_flags.started = 1;
866 task->td_flags.executing = 1;
867 task->td_flags.complete = 0;
868 task->td_flags.freed = 0;
871 task->td_depnode = NULL;
875 task->td_incomplete_child_tasks = 0;
876 task->td_allocated_child_tasks = 0;
878 task->td_taskgroup = NULL;
879 task->td_dephash = NULL;
881 __kmp_push_current_task_to_thread( this_thr, team, tid );
883 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
884 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
888 __kmp_task_init_ompt(task, tid, NULL);
891 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
904 __kmp_finish_implicit_task(kmp_info_t *thread)
906 kmp_taskdata_t *task = thread->th.th_current_task;
907 if (task->td_dephash)
908 __kmp_dephash_free_entries(thread, task->td_dephash);
919 __kmp_free_implicit_task(kmp_info_t *thread)
921 kmp_taskdata_t *task = thread->th.th_current_task;
922 if (task->td_dephash)
923 __kmp_dephash_free(thread, task->td_dephash);
924 task->td_dephash = NULL;
931 __kmp_round_up_to_val(
size_t size,
size_t val ) {
932 if ( size & ( val - 1 ) ) {
933 size &= ~ ( val - 1 );
934 if ( size <= KMP_SIZE_T_MAX - val ) {
955 __kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
956 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
957 kmp_routine_entry_t task_entry )
960 kmp_taskdata_t *taskdata;
961 kmp_info_t *thread = __kmp_threads[ gtid ];
962 kmp_team_t *team = thread->th.th_team;
963 kmp_taskdata_t *parent_task = thread->th.th_current_task;
964 size_t shareds_offset;
966 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 967 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
968 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
969 sizeof_shareds, task_entry) );
971 if ( parent_task->td_flags.final ) {
972 if (flags->merged_if0) {
978 if ( flags->proxy == TASK_PROXY ) {
979 flags->tiedness = TASK_UNTIED;
980 flags->merged_if0 = 1;
983 if ( (thread->th.th_task_team) == NULL ) {
987 KMP_DEBUG_ASSERT(team->t.t_serialized);
988 KA_TRACE(30,(
"T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
989 __kmp_task_team_setup(thread,team,1);
990 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
992 kmp_task_team_t * task_team = thread->th.th_task_team;
995 if ( !KMP_TASKING_ENABLED( task_team ) ) {
996 KA_TRACE(30,(
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
997 __kmp_enable_tasking( task_team, thread );
998 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
999 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
1001 if (thread_data -> td.td_deque == NULL ) {
1002 __kmp_alloc_task_deque( thread, thread_data );
1006 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
1007 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
1013 shareds_offset =
sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
1014 shareds_offset = __kmp_round_up_to_val( shareds_offset,
sizeof(
void * ));
1017 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n",
1018 gtid, shareds_offset) );
1019 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n",
1020 gtid, sizeof_shareds) );
1024 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
1026 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
1028 ANNOTATE_HAPPENS_AFTER(taskdata);
1030 task = KMP_TASKDATA_TO_TASK(taskdata);
1033 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1034 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(
double)-1) ) == 0 );
1035 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(
double)-1) ) == 0 );
1037 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(_Quad)-1) ) == 0 );
1038 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(_Quad)-1) ) == 0 );
1040 if (sizeof_shareds > 0) {
1042 task->shareds = & ((
char *) taskdata)[ shareds_offset ];
1044 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (
sizeof(
void *)-1) ) == 0 );
1046 task->shareds = NULL;
1048 task->routine = task_entry;
1051 taskdata->td_task_id = KMP_GEN_TASK_ID();
1052 taskdata->td_team = team;
1053 taskdata->td_alloc_thread = thread;
1054 taskdata->td_parent = parent_task;
1055 taskdata->td_level = parent_task->td_level + 1;
1056 taskdata->td_untied_count = 0;
1057 taskdata->td_ident = loc_ref;
1058 taskdata->td_taskwait_ident = NULL;
1059 taskdata->td_taskwait_counter = 0;
1060 taskdata->td_taskwait_thread = 0;
1061 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
1064 if ( flags->proxy == TASK_FULL )
1066 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
1068 taskdata->td_flags.tiedness = flags->tiedness;
1069 taskdata->td_flags.final = flags->final;
1070 taskdata->td_flags.merged_if0 = flags->merged_if0;
1072 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1073 #endif // OMP_40_ENABLED 1075 taskdata->td_flags.proxy = flags->proxy;
1076 taskdata->td_task_team = thread->th.th_task_team;
1077 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1079 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1082 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1085 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1090 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
1091 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1093 taskdata->td_flags.started = 0;
1094 taskdata->td_flags.executing = 0;
1095 taskdata->td_flags.complete = 0;
1096 taskdata->td_flags.freed = 0;
1098 taskdata->td_flags.native = flags->native;
1100 taskdata->td_incomplete_child_tasks = 0;
1101 taskdata->td_allocated_child_tasks = 1;
1103 taskdata->td_taskgroup = parent_task->td_taskgroup;
1104 taskdata->td_dephash = NULL;
1105 taskdata->td_depnode = NULL;
1110 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1112 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1115 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1117 if ( parent_task->td_taskgroup )
1118 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1121 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1122 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1126 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1127 gtid, taskdata, taskdata->td_parent) );
1128 ANNOTATE_HAPPENS_BEFORE(task);
1131 __kmp_task_init_ompt(taskdata, gtid, (
void*) task_entry);
1139 __kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1140 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1141 kmp_routine_entry_t task_entry )
1144 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1146 input_flags->native = FALSE;
1150 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1151 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1152 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1153 input_flags->proxy ?
"proxy" :
"",
1154 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1156 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1157 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1158 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1159 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1162 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1163 sizeof_shareds, task_entry );
1165 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1178 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1180 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1181 kmp_uint64 cur_time;
1185 KA_TRACE(30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1186 gtid, taskdata, current_task) );
1187 KMP_DEBUG_ASSERT(task);
1189 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1190 taskdata->td_flags.complete == 1)
1194 KA_TRACE(30, (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1197 __kmp_bottom_half_finish_proxy(gtid,task);
1199 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1205 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1206 if(__kmp_forkjoin_frames_mode == 3) {
1208 cur_time = __itt_get_timestamp();
1214 if ( taskdata->td_flags.proxy != TASK_PROXY ) {
1216 ANNOTATE_HAPPENS_AFTER(task);
1217 __kmp_task_start( gtid, task, current_task );
1223 ompt_thread_info_t oldInfo;
1224 kmp_info_t * thread;
1227 thread = __kmp_threads[ gtid ];
1228 oldInfo = thread->th.ompt_thread_info;
1229 thread->th.ompt_thread_info.wait_id = 0;
1230 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1231 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1239 if (__kmp_omp_cancellation) {
1240 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1241 kmp_team_t * this_team = this_thr->th.th_team;
1242 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1243 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1255 #if KMP_STATS_ENABLED 1257 switch(KMP_GET_THREAD_STATE()) {
1258 case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
break;
1259 case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
break;
1260 case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
break;
1261 case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
break;
1262 case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
break;
1263 default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
break;
1265 #endif // KMP_STATS_ENABLED 1266 #endif // OMP_40_ENABLED 1268 #if OMPT_SUPPORT && OMPT_TRACE 1271 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1273 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1274 current_task->ompt_task_info.task_id,
1275 taskdata->ompt_task_info.task_id);
1279 #ifdef KMP_GOMP_COMPAT 1280 if (taskdata->td_flags.native) {
1281 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1286 (*(task->routine))(gtid, task);
1288 KMP_POP_PARTITIONED_TIMER();
1290 #if OMPT_SUPPORT && OMPT_TRACE 1293 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1295 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1296 taskdata->ompt_task_info.task_id,
1297 current_task->ompt_task_info.task_id);
1303 #endif // OMP_40_ENABLED 1308 thread->th.ompt_thread_info = oldInfo;
1309 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1315 if ( taskdata->td_flags.proxy != TASK_PROXY ) {
1317 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1318 __kmp_task_finish( gtid, task, current_task );
1323 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1325 if(__kmp_forkjoin_frames_mode == 3) {
1326 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1327 if(this_thr->th.th_bar_arrive_time) {
1328 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1332 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1333 gtid, taskdata, current_task) );
1348 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1350 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1352 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1353 gtid, loc_ref, new_taskdata ) );
1358 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1360 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1361 new_taskdata->td_flags.task_serial = 1;
1362 __kmp_invoke_task( gtid, new_task, current_task );
1365 KA_TRACE(10, (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1366 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1369 ANNOTATE_HAPPENS_BEFORE(new_task);
1370 return TASK_CURRENT_NOT_QUEUED;
1383 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task,
bool serialize_immediate )
1385 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1389 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1390 __builtin_frame_address(1);
1397 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1399 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1402 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1403 if ( serialize_immediate )
1404 new_taskdata -> td_flags.task_serial = 1;
1405 __kmp_invoke_task( gtid, new_task, current_task );
1410 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1414 ANNOTATE_HAPPENS_BEFORE(new_task);
1415 return TASK_CURRENT_NOT_QUEUED;
1430 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1433 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1436 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1438 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1439 gtid, loc_ref, new_taskdata ) );
1441 res = __kmp_omp_task(gtid,new_task,
true);
1443 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1444 gtid, loc_ref, new_taskdata ) );
1452 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid )
1454 kmp_taskdata_t * taskdata;
1455 kmp_info_t * thread;
1456 int thread_finished = FALSE;
1457 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1459 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
1461 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1464 thread = __kmp_threads[ gtid ];
1465 taskdata = thread -> th.th_current_task;
1467 #if OMPT_SUPPORT && OMPT_TRACE 1468 ompt_task_id_t my_task_id;
1469 ompt_parallel_id_t my_parallel_id;
1472 kmp_team_t *team = thread->th.th_team;
1473 my_task_id = taskdata->ompt_task_info.task_id;
1474 my_parallel_id = team->t.ompt_team_info.parallel_id;
1476 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
1477 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1478 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1479 my_parallel_id, my_task_id);
1488 taskdata->td_taskwait_counter += 1;
1489 taskdata->td_taskwait_ident = loc_ref;
1490 taskdata->td_taskwait_thread = gtid + 1;
1493 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1494 if ( itt_sync_obj != NULL )
1495 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1498 bool must_wait = ! taskdata->td_flags.team_serial && ! taskdata->td_flags.final;
1501 must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks);
1505 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1506 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1507 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1508 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1512 if ( itt_sync_obj != NULL )
1513 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1518 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1520 #if OMPT_SUPPORT && OMPT_TRACE 1522 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1523 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
1524 my_parallel_id, my_task_id);
1526 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1529 ANNOTATE_HAPPENS_AFTER(taskdata);
1532 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1533 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1535 return TASK_CURRENT_NOT_QUEUED;
1543 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part )
1545 kmp_taskdata_t * taskdata;
1546 kmp_info_t * thread;
1547 int thread_finished = FALSE;
1550 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1552 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1553 gtid, loc_ref, end_part) );
1555 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1558 thread = __kmp_threads[ gtid ];
1559 taskdata = thread -> th.th_current_task;
1565 taskdata->td_taskwait_counter += 1;
1566 taskdata->td_taskwait_ident = loc_ref;
1567 taskdata->td_taskwait_thread = gtid + 1;
1570 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1571 if ( itt_sync_obj != NULL )
1572 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1574 if ( ! taskdata->td_flags.team_serial ) {
1575 kmp_task_team_t * task_team = thread->th.th_task_team;
1576 if (task_team != NULL) {
1577 if (KMP_TASKING_ENABLED(task_team)) {
1578 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1579 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1584 if ( itt_sync_obj != NULL )
1585 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1590 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1593 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 1594 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1596 return TASK_CURRENT_NOT_QUEUED;
1605 __kmpc_taskgroup(
ident_t* loc,
int gtid )
1607 kmp_info_t * thread = __kmp_threads[ gtid ];
1608 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1609 kmp_taskgroup_t * tg_new =
1610 (kmp_taskgroup_t *)__kmp_thread_malloc( thread,
sizeof( kmp_taskgroup_t ) );
1611 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1613 tg_new->cancel_request = cancel_noreq;
1614 tg_new->parent = taskdata->td_taskgroup;
1615 taskdata->td_taskgroup = tg_new;
1624 __kmpc_end_taskgroup(
ident_t* loc,
int gtid )
1626 kmp_info_t * thread = __kmp_threads[ gtid ];
1627 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1628 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1629 int thread_finished = FALSE;
1631 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1632 KMP_DEBUG_ASSERT( taskgroup != NULL );
1633 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
1635 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1638 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1639 if ( itt_sync_obj != NULL )
1640 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1644 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1646 if ( ! taskdata->td_flags.team_serial )
1649 kmp_flag_32 flag(&(taskgroup->count), 0U);
1650 while ( TCR_4(taskgroup->count) != 0 ) {
1651 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1652 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1657 if ( itt_sync_obj != NULL )
1658 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1661 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1664 taskdata->td_taskgroup = taskgroup->parent;
1665 __kmp_thread_free( thread, taskgroup );
1667 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1668 ANNOTATE_HAPPENS_AFTER(taskdata);
1677 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1678 kmp_int32 is_constrained )
1681 kmp_taskdata_t * taskdata;
1682 kmp_thread_data_t *thread_data;
1685 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1686 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL );
1688 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1690 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1691 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1692 thread_data->td.td_deque_tail) );
1694 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1695 KA_TRACE(10, (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1696 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1697 thread_data->td.td_deque_tail) );
1701 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1703 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1704 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1705 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1706 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1707 thread_data->td.td_deque_tail) );
1711 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td);
1712 taskdata = thread_data -> td.td_deque[ tail ];
1714 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
1717 kmp_taskdata_t * current = thread->th.th_current_task;
1718 kmp_int32 level = current->td_level;
1719 kmp_taskdata_t * parent = taskdata->td_parent;
1720 while ( parent != current && parent->td_level > level ) {
1721 parent = parent->td_parent;
1722 KMP_DEBUG_ASSERT(parent != NULL);
1724 if ( parent != current ) {
1726 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1727 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1728 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1729 thread_data->td.td_deque_tail) );
1734 thread_data -> td.td_deque_tail = tail;
1735 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1737 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1739 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1740 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1741 thread_data->td.td_deque_tail) );
1743 task = KMP_TASKDATA_TO_TASK( taskdata );
1754 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1755 volatile kmp_uint32 *unfinished_threads,
int *thread_finished,
1756 kmp_int32 is_constrained )
1759 kmp_taskdata_t * taskdata;
1760 kmp_thread_data_t *victim_td, *threads_data;
1761 kmp_int32 victim_tid;
1763 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1765 threads_data = task_team -> tt.tt_threads_data;
1766 KMP_DEBUG_ASSERT( threads_data != NULL );
1768 victim_tid = victim->th.th_info.ds.ds_tid;
1769 victim_td = & threads_data[ victim_tid ];
1771 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " 1772 "head=%u tail=%u\n",
1773 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1774 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1776 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1777 (TCR_PTR(victim->th.th_task_team) != task_team))
1779 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " 1780 "ntasks=%d head=%u tail=%u\n",
1781 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1782 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1786 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1789 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1790 (TCR_PTR(victim->th.th_task_team) != task_team))
1792 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1793 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 1794 "ntasks=%d head=%u tail=%u\n",
1795 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1796 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1800 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1802 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
1803 if ( is_constrained ) {
1806 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1807 kmp_int32 level = current->td_level;
1808 kmp_taskdata_t * parent = taskdata->td_parent;
1809 while ( parent != current && parent->td_level > level ) {
1810 parent = parent->td_parent;
1811 KMP_DEBUG_ASSERT(parent != NULL);
1813 if ( parent != current ) {
1817 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1818 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 1819 "ntasks=%d head=%u tail=%u\n",
1820 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1821 task_team, victim_td->td.td_deque_ntasks,
1822 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1827 victim_td->td.td_deque_head = (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
1828 if (*thread_finished) {
1834 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1836 KA_TRACE(20, (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1837 gtid, count + 1, task_team) );
1839 *thread_finished = FALSE;
1841 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1843 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1846 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " 1847 "ntasks=%d head=%u tail=%u\n",
1848 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1849 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1850 victim_td->td.td_deque_tail) );
1852 task = KMP_TASKDATA_TO_TASK( taskdata );
1867 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
1868 int *thread_finished
1869 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1871 kmp_task_team_t * task_team = thread->th.th_task_team;
1872 kmp_thread_data_t * threads_data;
1874 kmp_info_t * other_thread;
1875 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1876 volatile kmp_uint32 * unfinished_threads;
1877 kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
1879 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1880 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1882 if (task_team == NULL)
return FALSE;
1884 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1885 gtid, final_spin, *thread_finished) );
1887 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1888 KMP_DEBUG_ASSERT( threads_data != NULL );
1890 nthreads = task_team -> tt.tt_nproc;
1891 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1893 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1895 KMP_DEBUG_ASSERT( nthreads > 1 );
1897 KMP_DEBUG_ASSERT( (
int)(TCR_4(*unfinished_threads)) >= 0 );
1902 if (use_own_tasks) {
1903 task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
1905 if ((task == NULL) && (nthreads > 1)) {
1910 victim = threads_data[tid].td.td_deque_last_stolen;
1912 other_thread = threads_data[victim].td.td_thr;
1917 else if (!new_victim) {
1921 victim = __kmp_get_random(thread) % (nthreads - 1);
1922 if (victim >= tid) {
1926 other_thread = threads_data[victim].td.td_thr;
1933 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1934 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1935 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
1937 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1948 task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
1951 if (threads_data[tid].td.td_deque_last_stolen != victim) {
1952 threads_data[tid].td.td_deque_last_stolen = victim;
1959 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
1968 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1969 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1970 if ( itt_sync_obj == NULL ) {
1971 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1973 __kmp_itt_task_starting( itt_sync_obj );
1976 __kmp_invoke_task( gtid, task, current_task );
1978 if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
1984 if (flag == NULL || (!final_spin && flag->done_check())) {
1985 KA_TRACE(15, (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
1988 if (thread->th.th_task_team == NULL) {
1991 KMP_YIELD( __kmp_library == library_throughput );
1993 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
1994 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
2003 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
2010 if (! *thread_finished) {
2013 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
2014 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
2015 gtid, count, task_team) );
2016 *thread_finished = TRUE;
2023 if (flag != NULL && flag->done_check()) {
2024 KA_TRACE(15, (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
2030 if (thread->th.th_task_team == NULL) {
2031 KA_TRACE(15, (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
2043 KA_TRACE(15, (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
2049 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
2050 int *thread_finished
2051 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2053 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2054 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2057 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2058 int *thread_finished
2059 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2061 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2062 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2065 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2066 int *thread_finished
2067 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2069 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2070 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2081 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2083 kmp_thread_data_t *threads_data;
2084 int nthreads, i, is_init_thread;
2086 KA_TRACE( 10, (
"__kmp_enable_tasking(enter): T#%d\n",
2087 __kmp_gtid_from_thread( this_thr ) ) );
2089 KMP_DEBUG_ASSERT(task_team != NULL);
2090 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2092 nthreads = task_team->tt.tt_nproc;
2093 KMP_DEBUG_ASSERT(nthreads > 0);
2094 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2097 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2099 if (!is_init_thread) {
2101 KA_TRACE( 20, (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2102 __kmp_gtid_from_thread( this_thr ) ) );
2105 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2106 KMP_DEBUG_ASSERT( threads_data != NULL );
2108 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2109 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2114 for (i = 0; i < nthreads; i++) {
2115 volatile void *sleep_loc;
2116 kmp_info_t *thread = threads_data[i].td.td_thr;
2118 if (i == this_thr->th.th_info.ds.ds_tid) {
2128 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2130 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2131 __kmp_gtid_from_thread( this_thr ),
2132 __kmp_gtid_from_thread( thread ) ) );
2133 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2136 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2137 __kmp_gtid_from_thread( this_thr ),
2138 __kmp_gtid_from_thread( thread ) ) );
2143 KA_TRACE( 10, (
"__kmp_enable_tasking(exit): T#%d\n",
2144 __kmp_gtid_from_thread( this_thr ) ) );
2184 static kmp_task_team_t *__kmp_free_task_teams = NULL;
2186 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2198 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2200 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2201 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2204 thread_data -> td.td_deque_last_stolen = -1;
2206 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2207 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2208 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2210 KE_TRACE( 10, (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2211 __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
2215 thread_data -> td.td_deque = (kmp_taskdata_t **)
2216 __kmp_allocate( INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2217 thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2226 static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2228 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2229 kmp_int32 new_size = 2 * size;
2231 KE_TRACE( 10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2232 __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2234 kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size *
sizeof(kmp_taskdata_t *));
2237 for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2238 new_deque[j] = thread_data->td.td_deque[i];
2240 __kmp_free(thread_data->td.td_deque);
2242 thread_data -> td.td_deque_head = 0;
2243 thread_data -> td.td_deque_tail = size;
2244 thread_data -> td.td_deque = new_deque;
2245 thread_data -> td.td_deque_size = new_size;
2254 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2256 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2258 if ( thread_data -> td.td_deque != NULL ) {
2259 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2260 __kmp_free( thread_data -> td.td_deque );
2261 thread_data -> td.td_deque = NULL;
2263 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2265 #ifdef BUILD_TIED_TASK_STACK 2267 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2268 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2270 #endif // BUILD_TIED_TASK_STACK 2284 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2286 kmp_thread_data_t ** threads_data_p;
2287 kmp_int32 nthreads, maxthreads;
2288 int is_init_thread = FALSE;
2290 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2295 threads_data_p = & task_team -> tt.tt_threads_data;
2296 nthreads = task_team -> tt.tt_nproc;
2297 maxthreads = task_team -> tt.tt_max_threads;
2301 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2303 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2305 kmp_team_t *team = thread -> th.th_team;
2308 is_init_thread = TRUE;
2309 if ( maxthreads < nthreads ) {
2311 if ( *threads_data_p != NULL ) {
2312 kmp_thread_data_t *old_data = *threads_data_p;
2313 kmp_thread_data_t *new_data = NULL;
2315 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d reallocating " 2316 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2317 __kmp_gtid_from_thread( thread ), task_team,
2318 nthreads, maxthreads ) );
2323 new_data = (kmp_thread_data_t *)
2324 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2326 KMP_MEMCPY_S( (
void *) new_data, nthreads *
sizeof(kmp_thread_data_t),
2328 maxthreads *
sizeof(kmp_taskdata_t *) );
2330 #ifdef BUILD_TIED_TASK_STACK 2332 for (i = maxthreads; i < nthreads; i++) {
2333 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2334 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2336 #endif // BUILD_TIED_TASK_STACK 2338 (*threads_data_p) = new_data;
2339 __kmp_free( old_data );
2342 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 2343 "threads data for task_team %p, size = %d\n",
2344 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2348 ANNOTATE_IGNORE_WRITES_BEGIN();
2349 *threads_data_p = (kmp_thread_data_t *)
2350 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2351 ANNOTATE_IGNORE_WRITES_END();
2352 #ifdef BUILD_TIED_TASK_STACK 2354 for (i = 0; i < nthreads; i++) {
2355 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2356 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2358 #endif // BUILD_TIED_TASK_STACK 2360 task_team -> tt.tt_max_threads = nthreads;
2364 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2368 for (i = 0; i < nthreads; i++) {
2369 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2370 thread_data -> td.td_thr = team -> t.t_threads[i];
2372 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2376 thread_data -> td.td_deque_last_stolen = -1;
2381 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2384 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2385 return is_init_thread;
2395 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2397 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2398 if ( task_team -> tt.tt_threads_data != NULL ) {
2400 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2401 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2403 __kmp_free( task_team -> tt.tt_threads_data );
2404 task_team -> tt.tt_threads_data = NULL;
2406 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2415 static kmp_task_team_t *
2416 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2418 kmp_task_team_t *task_team = NULL;
2421 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2422 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2424 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2426 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2427 if (__kmp_free_task_teams != NULL) {
2428 task_team = __kmp_free_task_teams;
2429 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2430 task_team -> tt.tt_next = NULL;
2432 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2435 if (task_team == NULL) {
2436 KE_TRACE( 10, (
"__kmp_allocate_task_team: T#%d allocating " 2437 "task team for team %p\n",
2438 __kmp_gtid_from_thread( thread ), team ) );
2442 task_team = (kmp_task_team_t *) __kmp_allocate(
sizeof(kmp_task_team_t) );
2443 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2449 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2451 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2453 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2455 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2456 TCW_4( task_team -> tt.tt_active, TRUE );
2458 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2459 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
2470 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2472 KA_TRACE( 20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2473 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2476 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2478 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2479 task_team -> tt.tt_next = __kmp_free_task_teams;
2480 TCW_PTR(__kmp_free_task_teams, task_team);
2482 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2493 __kmp_reap_task_teams(
void )
2495 kmp_task_team_t *task_team;
2497 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2499 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2500 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2501 __kmp_free_task_teams = task_team -> tt.tt_next;
2502 task_team -> tt.tt_next = NULL;
2505 if ( task_team -> tt.tt_threads_data != NULL ) {
2506 __kmp_free_task_threads_data( task_team );
2508 __kmp_free( task_team );
2510 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2520 __kmp_wait_to_unref_task_teams(
void)
2526 KMP_INIT_YIELD( spins );
2535 for (thread = (kmp_info_t *)__kmp_thread_pool;
2537 thread = thread->th.th_next_pool)
2542 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2543 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2544 __kmp_gtid_from_thread( thread ) ) );
2549 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2550 thread->th.th_task_team = NULL;
2557 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2558 __kmp_gtid_from_thread( thread ) ) );
2560 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2561 volatile void *sleep_loc;
2563 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2564 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2565 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2566 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2577 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2578 KMP_YIELD_SPIN( spins );
2587 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team,
int always )
2589 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2593 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
2594 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2595 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
2596 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2597 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2605 if (team->t.t_nproc > 1) {
2606 int other_team = 1 - this_thr->th.th_task_state;
2607 if (team->t.t_task_team[other_team] == NULL) {
2608 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2609 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2610 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2611 ((team != NULL) ? team->t.t_id : -1), other_team ));
2614 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2615 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2616 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2617 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2619 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2621 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2622 TCW_4(task_team->tt.tt_active, TRUE );
2625 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2626 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2627 ((team != NULL) ? team->t.t_id : -1), other_team ));
2639 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2641 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2644 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2646 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2647 KA_TRACE(20, (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
2648 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2649 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2660 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2661 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
2664 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2666 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2667 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2669 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2671 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2672 __kmp_gtid_from_thread(this_thr), task_team));
2675 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2676 flag.wait(this_thr, TRUE
2677 USE_ITT_BUILD_ARG(itt_sync_obj));
2680 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 2681 "setting active to false, setting local and team's pointer to NULL\n",
2682 __kmp_gtid_from_thread(this_thr), task_team));
2684 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2685 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2687 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2689 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2692 TCW_PTR(this_thr->th.th_task_team, NULL);
2705 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread,
int gtid )
2707 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2709 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2712 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2714 kmp_flag_32 spin_flag(spin, 0U);
2715 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2716 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2719 KMP_FSYNC_SPIN_PREPARE( spin );
2722 if( TCR_4(__kmp_global.g.g_done) ) {
2723 if( __kmp_global.g.g_abort )
2724 __kmp_abort_thread( );
2730 KMP_FSYNC_SPIN_ACQUIRED( (
void*) spin );
2743 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
2745 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2746 kmp_task_team_t * task_team = taskdata->td_task_team;
2748 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2751 KMP_DEBUG_ASSERT( task_team != NULL );
2753 bool result =
false;
2754 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2756 if (thread_data -> td.td_deque == NULL ) {
2759 KA_TRACE(30, (
"__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2763 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2765 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2768 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
return result;
2770 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2771 __kmp_realloc_task_deque(thread,thread_data);
2775 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2777 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2779 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2782 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2783 goto release_and_exit;
2785 __kmp_realloc_task_deque(thread,thread_data);
2791 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2793 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
2794 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2797 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2800 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2820 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2822 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2823 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2824 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2825 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2827 taskdata -> td_flags.complete = 1;
2829 if ( taskdata->td_taskgroup )
2830 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2833 TCI_4(taskdata->td_incomplete_child_tasks);
2836 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2838 kmp_int32 children = 0;
2841 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2842 KMP_DEBUG_ASSERT( children >= 0 );
2845 TCD_4(taskdata->td_incomplete_child_tasks);
2848 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2850 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2851 kmp_info_t * thread = __kmp_threads[ gtid ];
2853 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2854 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 );
2858 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2860 __kmp_release_deps(gtid,taskdata);
2861 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2871 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2873 KMP_DEBUG_ASSERT( ptask != NULL );
2874 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2875 KA_TRACE(10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2877 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2879 __kmp_first_top_half_finish_proxy(taskdata);
2880 __kmp_second_top_half_finish_proxy(taskdata);
2881 __kmp_bottom_half_finish_proxy(gtid,ptask);
2883 KA_TRACE(10, (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2892 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2894 KMP_DEBUG_ASSERT( ptask != NULL );
2895 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2897 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2899 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2901 __kmp_first_top_half_finish_proxy(taskdata);
2904 kmp_team_t * team = taskdata->td_team;
2905 kmp_int32 nthreads = team->t.t_nproc;
2909 kmp_int32 start_k = 0;
2911 kmp_int32 k = start_k;
2915 thread = team->t.t_threads[k];
2916 k = (k+1) % nthreads;
2919 if ( k == start_k ) pass = pass << 1;
2921 }
while ( !__kmp_give_task( thread, k, ptask, pass ) );
2923 __kmp_second_top_half_finish_proxy(taskdata);
2925 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2935 __kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2938 kmp_taskdata_t *taskdata;
2939 kmp_taskdata_t *taskdata_src;
2940 kmp_taskdata_t *parent_task = thread->th.th_current_task;
2941 size_t shareds_offset;
2944 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2945 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2946 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL );
2947 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2948 task_size = taskdata_src->td_size_alloc;
2951 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2953 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2955 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2957 KMP_MEMCPY(taskdata, taskdata_src, task_size);
2959 task = KMP_TASKDATA_TO_TASK(taskdata);
2962 taskdata->td_task_id = KMP_GEN_TASK_ID();
2963 if( task->shareds != NULL ) {
2964 shareds_offset = (
char*)task_src->shareds - (
char*)taskdata_src;
2965 task->shareds = &((
char*)taskdata)[shareds_offset];
2966 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (
sizeof(
void*)-1)) == 0 );
2968 taskdata->td_alloc_thread = thread;
2969 taskdata->td_taskgroup = parent_task->td_taskgroup;
2972 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2973 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2974 if ( parent_task->td_taskgroup )
2975 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2977 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2978 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2981 KA_TRACE(20, (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2982 thread, taskdata, taskdata->td_parent) );
2984 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (
void*)task->routine);
2992 typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3007 __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
3008 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3009 int sched, kmp_uint64 grainsize,
void *task_dup )
3012 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3013 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3015 kmp_uint64 lower = *lb;
3016 kmp_uint64 upper = *ub;
3017 kmp_uint64 i, num_tasks = 0, extras = 0;
3018 kmp_info_t *thread = __kmp_threads[gtid];
3019 kmp_taskdata_t *current_task = thread->th.th_current_task;
3020 kmp_task_t *next_task;
3021 kmp_int32 lastpriv = 0;
3022 size_t lower_offset = (
char*)lb - (
char*)task;
3023 size_t upper_offset = (
char*)ub - (
char*)task;
3027 tc = upper - lower + 1;
3028 }
else if ( st < 0 ) {
3029 tc = (lower - upper) / (-st) + 1;
3031 tc = (upper - lower) / st + 1;
3034 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
3036 __kmp_task_start( gtid, task, current_task );
3038 __kmp_task_finish( gtid, task, current_task );
3046 grainsize = thread->th.th_team_nproc * 10;
3048 if( grainsize > tc ) {
3053 num_tasks = grainsize;
3054 grainsize = tc / num_tasks;
3055 extras = tc % num_tasks;
3059 if( grainsize > tc ) {
3064 num_tasks = tc / grainsize;
3065 grainsize = tc / num_tasks;
3066 extras = tc % num_tasks;
3070 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
3072 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3073 KMP_DEBUG_ASSERT(num_tasks > extras);
3074 KMP_DEBUG_ASSERT(num_tasks > 0);
3075 KA_TRACE(20, (
"__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
3076 gtid, num_tasks, grainsize, extras));
3079 for( i = 0; i < num_tasks; ++i ) {
3080 kmp_uint64 chunk_minus_1;
3082 chunk_minus_1 = grainsize - 1;
3084 chunk_minus_1 = grainsize;
3087 upper = lower + st * chunk_minus_1;
3088 if( i == num_tasks - 1 ) {
3093 KMP_DEBUG_ASSERT(upper == *ub);
3095 KMP_DEBUG_ASSERT(upper+st > *ub);
3097 KMP_DEBUG_ASSERT(upper+st < *ub);
3100 next_task = __kmp_task_dup_alloc(thread, task);
3101 *(kmp_uint64*)((
char*)next_task + lower_offset) = lower;
3102 *(kmp_uint64*)((
char*)next_task + upper_offset) = upper;
3103 if( ptask_dup != NULL )
3104 ptask_dup(next_task, task, lastpriv);
3105 KA_TRACE(20, (
"__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
3106 gtid, next_task, lower, upper, lower_offset, upper_offset));
3107 __kmp_omp_task(gtid, next_task,
true);
3111 __kmp_task_start( gtid, task, current_task );
3113 __kmp_task_finish( gtid, task, current_task );
3133 __kmpc_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
3134 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3135 int nogroup,
int sched, kmp_uint64 grainsize,
void *task_dup )
3137 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3138 KMP_DEBUG_ASSERT( task != NULL );
3140 KA_TRACE(10, (
"__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3141 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3145 taskdata->td_flags.task_serial = 1;
3146 taskdata->td_flags.tiedness = TASK_TIED;
3148 if( nogroup == 0 ) {
3149 __kmpc_taskgroup( loc, gtid );
3153 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3156 if( nogroup == 0 ) {
3157 __kmpc_end_taskgroup( loc, gtid );
3159 KA_TRACE(10, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).