LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 /*
17  * Static scheduling initialization.
18  *
19  * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20  * it may change values between parallel regions. __kmp_max_nth
21  * is the largest value __kmp_nth may take, 1 is the smallest.
22  *
23  */
24 
25 #include "kmp.h"
26 #include "kmp_i18n.h"
27 #include "kmp_str.h"
28 #include "kmp_error.h"
29 #include "kmp_stats.h"
30 #include "kmp_itt.h"
31 
32 #if OMPT_SUPPORT
33 #include "ompt-specific.h"
34 #endif
35 
36 // template for type limits
37 template< typename T >
38 struct i_maxmin {
39  static const T mx;
40  static const T mn;
41 };
42 template<>
43 struct i_maxmin< int > {
44  static const int mx = 0x7fffffff;
45  static const int mn = 0x80000000;
46 };
47 template<>
48 struct i_maxmin< unsigned int > {
49  static const unsigned int mx = 0xffffffff;
50  static const unsigned int mn = 0x00000000;
51 };
52 template<>
53 struct i_maxmin< long long > {
54  static const long long mx = 0x7fffffffffffffffLL;
55  static const long long mn = 0x8000000000000000LL;
56 };
57 template<>
58 struct i_maxmin< unsigned long long > {
59  static const unsigned long long mx = 0xffffffffffffffffLL;
60  static const unsigned long long mn = 0x0000000000000000LL;
61 };
62 //-------------------------------------------------------------------------
63 #ifdef KMP_DEBUG
64 //-------------------------------------------------------------------------
65 // template for debug prints specification ( d, u, lld, llu )
66  char const * traits_t< int >::spec = "d";
67  char const * traits_t< unsigned int >::spec = "u";
68  char const * traits_t< long long >::spec = "lld";
69  char const * traits_t< unsigned long long >::spec = "llu";
70 //-------------------------------------------------------------------------
71 #endif
72 
73 template< typename T >
74 static void
75 __kmp_for_static_init(
76  ident_t *loc,
77  kmp_int32 global_tid,
78  kmp_int32 schedtype,
79  kmp_int32 *plastiter,
80  T *plower,
81  T *pupper,
82  typename traits_t< T >::signed_t *pstride,
83  typename traits_t< T >::signed_t incr,
84  typename traits_t< T >::signed_t chunk
85 ) {
86  KMP_COUNT_BLOCK(OMP_FOR_static);
87  KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
88 
89  typedef typename traits_t< T >::unsigned_t UT;
90  typedef typename traits_t< T >::signed_t ST;
91  /* this all has to be changed back to TID and such.. */
92  register kmp_int32 gtid = global_tid;
93  register kmp_uint32 tid;
94  register kmp_uint32 nth;
95  register UT trip_count;
96  register kmp_team_t *team;
97  register kmp_info_t *th = __kmp_threads[ gtid ];
98 
99 #if OMPT_SUPPORT && OMPT_TRACE
100  ompt_team_info_t *team_info = NULL;
101  ompt_task_info_t *task_info = NULL;
102 
103  if (ompt_enabled) {
104  // Only fully initialize variables needed by OMPT if OMPT is enabled.
105  team_info = __ompt_get_teaminfo(0, NULL);
106  task_info = __ompt_get_taskinfo(0);
107  }
108 #endif
109 
110  KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
111  KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
112  #ifdef KMP_DEBUG
113  {
114  const char * buff;
115  // create format specifiers before the debug output
116  buff = __kmp_str_format(
117  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
118  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
119  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
120  traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121  KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
122  *plower, *pupper, *pstride, incr, chunk ) );
123  __kmp_str_free( &buff );
124  }
125  #endif
126 
127  if ( __kmp_env_consistency_check ) {
128  __kmp_push_workshare( global_tid, ct_pdo, loc );
129  if ( incr == 0 ) {
130  __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
131  }
132  }
133  /* special handling for zero-trip loops */
134  if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
135  if( plastiter != NULL )
136  *plastiter = FALSE;
137  /* leave pupper and plower set to entire iteration space */
138  *pstride = incr; /* value should never be used */
139  // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
140  upper=0,stride=1) - JPH June 23, 2009.
141  #ifdef KMP_DEBUG
142  {
143  const char * buff;
144  // create format specifiers before the debug output
145  buff = __kmp_str_format(
146  "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
147  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
148  KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
149  __kmp_str_free( &buff );
150  }
151  #endif
152  KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
153 
154 #if OMPT_SUPPORT && OMPT_TRACE
155  if (ompt_enabled &&
156  ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
157  ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
158  team_info->parallel_id, task_info->task_id,
159  team_info->microtask);
160  }
161 #endif
162  KMP_COUNT_VALUE (FOR_static_iterations, 0);
163  return;
164  }
165 
166  #if OMP_40_ENABLED
167  // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute",
168  // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed
169  // for static schedules.
170  if ( schedtype > kmp_ord_upper ) {
171  // we are in DISTRIBUTE construct
172  schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
173  tid = th->th.th_team->t.t_master_tid;
174  team = th->th.th_team->t.t_parent;
175  } else
176  #endif
177  {
178  tid = __kmp_tid_from_gtid( global_tid );
179  team = th->th.th_team;
180  }
181 
182  /* determine if "for" loop is an active worksharing construct */
183  if ( team -> t.t_serialized ) {
184  /* serialized parallel, each thread executes whole iteration space */
185  if( plastiter != NULL )
186  *plastiter = TRUE;
187  /* leave pupper and plower set to entire iteration space */
188  *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
189 
190  #ifdef KMP_DEBUG
191  {
192  const char * buff;
193  // create format specifiers before the debug output
194  buff = __kmp_str_format(
195  "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
196  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
197  KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
198  __kmp_str_free( &buff );
199  }
200  #endif
201  KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
202 
203 #if OMPT_SUPPORT && OMPT_TRACE
204  if (ompt_enabled &&
205  ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
206  ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
207  team_info->parallel_id, task_info->task_id,
208  team_info->microtask);
209  }
210 #endif
211  return;
212  }
213  nth = team->t.t_nproc;
214  if ( nth == 1 ) {
215  if( plastiter != NULL )
216  *plastiter = TRUE;
217  *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
218  #ifdef KMP_DEBUG
219  {
220  const char * buff;
221  // create format specifiers before the debug output
222  buff = __kmp_str_format(
223  "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
224  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
225  KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
226  __kmp_str_free( &buff );
227  }
228  #endif
229  KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
230 
231 #if OMPT_SUPPORT && OMPT_TRACE
232  if (ompt_enabled &&
233  ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
234  ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
235  team_info->parallel_id, task_info->task_id,
236  team_info->microtask);
237  }
238 #endif
239  return;
240  }
241 
242  /* compute trip count */
243  if ( incr == 1 ) {
244  trip_count = *pupper - *plower + 1;
245  } else if (incr == -1) {
246  trip_count = *plower - *pupper + 1;
247  } else if ( incr > 0 ) {
248  // upper-lower can exceed the limit of signed type
249  trip_count = (UT)(*pupper - *plower) / incr + 1;
250  } else {
251  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
252  }
253 
254  if ( __kmp_env_consistency_check ) {
255  /* tripcount overflow? */
256  if ( trip_count == 0 && *pupper != *plower ) {
257  __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
258  }
259  }
260  KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
261 
262  /* compute remaining parameters */
263  switch ( schedtype ) {
264  case kmp_sch_static:
265  {
266  if ( trip_count < nth ) {
267  KMP_DEBUG_ASSERT(
268  __kmp_static == kmp_sch_static_greedy || \
269  __kmp_static == kmp_sch_static_balanced
270  ); // Unknown static scheduling type.
271  if ( tid < trip_count ) {
272  *pupper = *plower = *plower + tid * incr;
273  } else {
274  *plower = *pupper + incr;
275  }
276  if( plastiter != NULL )
277  *plastiter = ( tid == trip_count - 1 );
278  } else {
279  if ( __kmp_static == kmp_sch_static_balanced ) {
280  register UT small_chunk = trip_count / nth;
281  register UT extras = trip_count % nth;
282  *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
283  *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
284  if( plastiter != NULL )
285  *plastiter = ( tid == nth - 1 );
286  } else {
287  register T big_chunk_inc_count = ( trip_count/nth +
288  ( ( trip_count % nth ) ? 1 : 0) ) * incr;
289  register T old_upper = *pupper;
290 
291  KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
292  // Unknown static scheduling type.
293 
294  *plower += tid * big_chunk_inc_count;
295  *pupper = *plower + big_chunk_inc_count - incr;
296  if ( incr > 0 ) {
297  if( *pupper < *plower )
298  *pupper = i_maxmin< T >::mx;
299  if( plastiter != NULL )
300  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
301  if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
302  } else {
303  if( *pupper > *plower )
304  *pupper = i_maxmin< T >::mn;
305  if( plastiter != NULL )
306  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
307  if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
308  }
309  }
310  }
311  break;
312  }
313  case kmp_sch_static_chunked:
314  {
315  register ST span;
316  if ( chunk < 1 ) {
317  chunk = 1;
318  }
319  span = chunk * incr;
320  *pstride = span * nth;
321  *plower = *plower + (span * tid);
322  *pupper = *plower + span - incr;
323  if( plastiter != NULL )
324  *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
325  break;
326  }
327 #if OMP_45_ENABLED
328  case kmp_sch_static_balanced_chunked:
329  {
330  register T old_upper = *pupper;
331  // round up to make sure the chunk is enough to cover all iterations
332  register UT span = (trip_count+nth-1) / nth;
333 
334  // perform chunk adjustment
335  chunk = (span + chunk - 1) & ~(chunk-1);
336 
337  span = chunk * incr;
338  *plower = *plower + (span * tid);
339  *pupper = *plower + span - incr;
340  if ( incr > 0 ) {
341  if ( *pupper > old_upper ) *pupper = old_upper;
342  } else
343  if ( *pupper < old_upper ) *pupper = old_upper;
344 
345  if( plastiter != NULL )
346  *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) );
347  break;
348  }
349 #endif
350  default:
351  KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
352  break;
353  }
354 
355 #if USE_ITT_BUILD
356  // Report loop metadata
357  if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
358 #if OMP_40_ENABLED
359  th->th.th_teams_microtask == NULL &&
360 #endif
361  team->t.t_active_level == 1 )
362  {
363  kmp_uint64 cur_chunk = chunk;
364  // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
365  if ( schedtype == kmp_sch_static ) {
366  cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
367  }
368  // 0 - "static" schedule
369  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
370  }
371 #endif
372  #ifdef KMP_DEBUG
373  {
374  const char * buff;
375  // create format specifiers before the debug output
376  buff = __kmp_str_format(
377  "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
378  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
379  KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
380  __kmp_str_free( &buff );
381  }
382  #endif
383  KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
384 
385 #if OMPT_SUPPORT && OMPT_TRACE
386  if (ompt_enabled &&
387  ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
388  ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
389  team_info->parallel_id, task_info->task_id, team_info->microtask);
390  }
391 #endif
392 
393  return;
394 }
395 
396 template< typename T >
397 static void
398 __kmp_dist_for_static_init(
399  ident_t *loc,
400  kmp_int32 gtid,
401  kmp_int32 schedule,
402  kmp_int32 *plastiter,
403  T *plower,
404  T *pupper,
405  T *pupperDist,
406  typename traits_t< T >::signed_t *pstride,
407  typename traits_t< T >::signed_t incr,
408  typename traits_t< T >::signed_t chunk
409 ) {
410  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
411  typedef typename traits_t< T >::unsigned_t UT;
412  typedef typename traits_t< T >::signed_t ST;
413  register kmp_uint32 tid;
414  register kmp_uint32 nth;
415  register kmp_uint32 team_id;
416  register kmp_uint32 nteams;
417  register UT trip_count;
418  register kmp_team_t *team;
419  kmp_info_t * th;
420 
421  KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
422  KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
423  #ifdef KMP_DEBUG
424  {
425  const char * buff;
426  // create format specifiers before the debug output
427  buff = __kmp_str_format(
428  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
429  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
430  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
431  traits_t< ST >::spec, traits_t< T >::spec );
432  KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
433  *plower, *pupper, incr, chunk ) );
434  __kmp_str_free( &buff );
435  }
436  #endif
437 
438  if( __kmp_env_consistency_check ) {
439  __kmp_push_workshare( gtid, ct_pdo, loc );
440  if( incr == 0 ) {
441  __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
442  }
443  if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
444  // The loop is illegal.
445  // Some zero-trip loops maintained by compiler, e.g.:
446  // for(i=10;i<0;++i) // lower >= upper - run-time check
447  // for(i=0;i>10;--i) // lower <= upper - run-time check
448  // for(i=0;i>10;++i) // incr > 0 - compile-time check
449  // for(i=10;i<0;--i) // incr < 0 - compile-time check
450  // Compiler does not check the following illegal loops:
451  // for(i=0;i<10;i+=incr) // where incr<0
452  // for(i=10;i>0;i-=incr) // where incr<0
453  __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
454  }
455  }
456  tid = __kmp_tid_from_gtid( gtid );
457  th = __kmp_threads[gtid];
458  nth = th->th.th_team_nproc;
459  team = th->th.th_team;
460  #if OMP_40_ENABLED
461  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
462  nteams = th->th.th_teams_size.nteams;
463  #endif
464  team_id = team->t.t_master_tid;
465  KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
466 
467  // compute global trip count
468  if( incr == 1 ) {
469  trip_count = *pupper - *plower + 1;
470  } else if(incr == -1) {
471  trip_count = *plower - *pupper + 1;
472  } else if ( incr > 0 ) {
473  // upper-lower can exceed the limit of signed type
474  trip_count = (UT)(*pupper - *plower) / incr + 1;
475  } else {
476  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
477  }
478 
479  *pstride = *pupper - *plower; // just in case (can be unused)
480  if( trip_count <= nteams ) {
481  KMP_DEBUG_ASSERT(
482  __kmp_static == kmp_sch_static_greedy || \
483  __kmp_static == kmp_sch_static_balanced
484  ); // Unknown static scheduling type.
485  // only masters of some teams get single iteration, other threads get nothing
486  if( team_id < trip_count && tid == 0 ) {
487  *pupper = *pupperDist = *plower = *plower + team_id * incr;
488  } else {
489  *pupperDist = *pupper;
490  *plower = *pupper + incr; // compiler should skip loop body
491  }
492  if( plastiter != NULL )
493  *plastiter = ( tid == 0 && team_id == trip_count - 1 );
494  } else {
495  // Get the team's chunk first (each team gets at most one chunk)
496  if( __kmp_static == kmp_sch_static_balanced ) {
497  register UT chunkD = trip_count / nteams;
498  register UT extras = trip_count % nteams;
499  *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
500  *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
501  if( plastiter != NULL )
502  *plastiter = ( team_id == nteams - 1 );
503  } else {
504  register T chunk_inc_count =
505  ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
506  register T upper = *pupper;
507  KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
508  // Unknown static scheduling type.
509  *plower += team_id * chunk_inc_count;
510  *pupperDist = *plower + chunk_inc_count - incr;
511  // Check/correct bounds if needed
512  if( incr > 0 ) {
513  if( *pupperDist < *plower )
514  *pupperDist = i_maxmin< T >::mx;
515  if( plastiter != NULL )
516  *plastiter = *plower <= upper && *pupperDist > upper - incr;
517  if( *pupperDist > upper )
518  *pupperDist = upper; // tracker C73258
519  if( *plower > *pupperDist ) {
520  *pupper = *pupperDist; // no iterations available for the team
521  goto end;
522  }
523  } else {
524  if( *pupperDist > *plower )
525  *pupperDist = i_maxmin< T >::mn;
526  if( plastiter != NULL )
527  *plastiter = *plower >= upper && *pupperDist < upper - incr;
528  if( *pupperDist < upper )
529  *pupperDist = upper; // tracker C73258
530  if( *plower < *pupperDist ) {
531  *pupper = *pupperDist; // no iterations available for the team
532  goto end;
533  }
534  }
535  }
536  // Get the parallel loop chunk now (for thread)
537  // compute trip count for team's chunk
538  if( incr == 1 ) {
539  trip_count = *pupperDist - *plower + 1;
540  } else if(incr == -1) {
541  trip_count = *plower - *pupperDist + 1;
542  } else if ( incr > 1 ) {
543  // upper-lower can exceed the limit of signed type
544  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
545  } else {
546  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
547  }
548  KMP_DEBUG_ASSERT( trip_count );
549  switch( schedule ) {
550  case kmp_sch_static:
551  {
552  if( trip_count <= nth ) {
553  KMP_DEBUG_ASSERT(
554  __kmp_static == kmp_sch_static_greedy || \
555  __kmp_static == kmp_sch_static_balanced
556  ); // Unknown static scheduling type.
557  if( tid < trip_count )
558  *pupper = *plower = *plower + tid * incr;
559  else
560  *plower = *pupper + incr; // no iterations available
561  if( plastiter != NULL )
562  if( *plastiter != 0 && !( tid == trip_count - 1 ) )
563  *plastiter = 0;
564  } else {
565  if( __kmp_static == kmp_sch_static_balanced ) {
566  register UT chunkL = trip_count / nth;
567  register UT extras = trip_count % nth;
568  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
569  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
570  if( plastiter != NULL )
571  if( *plastiter != 0 && !( tid == nth - 1 ) )
572  *plastiter = 0;
573  } else {
574  register T chunk_inc_count =
575  ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
576  register T upper = *pupperDist;
577  KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
578  // Unknown static scheduling type.
579  *plower += tid * chunk_inc_count;
580  *pupper = *plower + chunk_inc_count - incr;
581  if( incr > 0 ) {
582  if( *pupper < *plower )
583  *pupper = i_maxmin< T >::mx;
584  if( plastiter != NULL )
585  if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
586  *plastiter = 0;
587  if( *pupper > upper )
588  *pupper = upper;//tracker C73258
589  } else {
590  if( *pupper > *plower )
591  *pupper = i_maxmin< T >::mn;
592  if( plastiter != NULL )
593  if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
594  *plastiter = 0;
595  if( *pupper < upper )
596  *pupper = upper;//tracker C73258
597  }
598  }
599  }
600  break;
601  }
602  case kmp_sch_static_chunked:
603  {
604  register ST span;
605  if( chunk < 1 )
606  chunk = 1;
607  span = chunk * incr;
608  *pstride = span * nth;
609  *plower = *plower + (span * tid);
610  *pupper = *plower + span - incr;
611  if( plastiter != NULL )
612  if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
613  *plastiter = 0;
614  break;
615  }
616  default:
617  KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
618  break;
619  }
620  }
621  end:;
622  #ifdef KMP_DEBUG
623  {
624  const char * buff;
625  // create format specifiers before the debug output
626  buff = __kmp_str_format(
627  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
628  "stride=%%%s signed?<%s>\n",
629  traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
630  traits_t< ST >::spec, traits_t< T >::spec );
631  KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
632  __kmp_str_free( &buff );
633  }
634  #endif
635  KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
636  return;
637 }
638 
639 template< typename T >
640 static void
641 __kmp_team_static_init(
642  ident_t *loc,
643  kmp_int32 gtid,
644  kmp_int32 *p_last,
645  T *p_lb,
646  T *p_ub,
647  typename traits_t< T >::signed_t *p_st,
648  typename traits_t< T >::signed_t incr,
649  typename traits_t< T >::signed_t chunk
650 ) {
651  // The routine returns the first chunk distributed to the team and
652  // stride for next chunks calculation.
653  // Last iteration flag set for the team that will execute
654  // the last iteration of the loop.
655  // The routine is called for dist_schedue(static,chunk) only.
656  typedef typename traits_t< T >::unsigned_t UT;
657  typedef typename traits_t< T >::signed_t ST;
658  kmp_uint32 team_id;
659  kmp_uint32 nteams;
660  UT trip_count;
661  T lower;
662  T upper;
663  ST span;
664  kmp_team_t *team;
665  kmp_info_t *th;
666 
667  KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
668  KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
669  #ifdef KMP_DEBUG
670  {
671  const char * buff;
672  // create format specifiers before the debug output
673  buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
674  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
675  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
676  traits_t< ST >::spec, traits_t< T >::spec );
677  KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
678  __kmp_str_free( &buff );
679  }
680  #endif
681 
682  lower = *p_lb;
683  upper = *p_ub;
684  if( __kmp_env_consistency_check ) {
685  if( incr == 0 ) {
686  __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
687  }
688  if( incr > 0 ? (upper < lower) : (lower < upper) ) {
689  // The loop is illegal.
690  // Some zero-trip loops maintained by compiler, e.g.:
691  // for(i=10;i<0;++i) // lower >= upper - run-time check
692  // for(i=0;i>10;--i) // lower <= upper - run-time check
693  // for(i=0;i>10;++i) // incr > 0 - compile-time check
694  // for(i=10;i<0;--i) // incr < 0 - compile-time check
695  // Compiler does not check the following illegal loops:
696  // for(i=0;i<10;i+=incr) // where incr<0
697  // for(i=10;i>0;i-=incr) // where incr<0
698  __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
699  }
700  }
701  th = __kmp_threads[gtid];
702  team = th->th.th_team;
703  #if OMP_40_ENABLED
704  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
705  nteams = th->th.th_teams_size.nteams;
706  #endif
707  team_id = team->t.t_master_tid;
708  KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
709 
710  // compute trip count
711  if( incr == 1 ) {
712  trip_count = upper - lower + 1;
713  } else if(incr == -1) {
714  trip_count = lower - upper + 1;
715  } else if ( incr > 0 ) {
716  // upper-lower can exceed the limit of signed type
717  trip_count = (UT)(upper - lower) / incr + 1;
718  } else {
719  trip_count = (UT)(lower - upper) / (-incr) + 1;
720  }
721  if( chunk < 1 )
722  chunk = 1;
723  span = chunk * incr;
724  *p_st = span * nteams;
725  *p_lb = lower + (span * team_id);
726  *p_ub = *p_lb + span - incr;
727  if ( p_last != NULL )
728  *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
729  // Correct upper bound if needed
730  if( incr > 0 ) {
731  if( *p_ub < *p_lb ) // overflow?
732  *p_ub = i_maxmin< T >::mx;
733  if( *p_ub > upper )
734  *p_ub = upper; // tracker C73258
735  } else { // incr < 0
736  if( *p_ub > *p_lb )
737  *p_ub = i_maxmin< T >::mn;
738  if( *p_ub < upper )
739  *p_ub = upper; // tracker C73258
740  }
741  #ifdef KMP_DEBUG
742  {
743  const char * buff;
744  // create format specifiers before the debug output
745  buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
746  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
747  traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
748  traits_t< ST >::spec );
749  KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
750  __kmp_str_free( &buff );
751  }
752  #endif
753 }
754 
755 //--------------------------------------------------------------------------------------
756 extern "C" {
757 
778 void
779 __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
780  kmp_int32 *plower, kmp_int32 *pupper,
781  kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
782 {
783  __kmp_for_static_init< kmp_int32 >(
784  loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
785 }
786 
790 void
791 __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
792  kmp_uint32 *plower, kmp_uint32 *pupper,
793  kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
794 {
795  __kmp_for_static_init< kmp_uint32 >(
796  loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
797 }
798 
802 void
803 __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
804  kmp_int64 *plower, kmp_int64 *pupper,
805  kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
806 {
807  __kmp_for_static_init< kmp_int64 >(
808  loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
809 }
810 
814 void
815 __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
816  kmp_uint64 *plower, kmp_uint64 *pupper,
817  kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
818 {
819  __kmp_for_static_init< kmp_uint64 >(
820  loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
821 }
848 void
850  ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
851  kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
852  kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
853 {
854  __kmp_dist_for_static_init< kmp_int32 >(
855  loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
856 }
857 
861 void
863  ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
864  kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
865  kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
866 {
867  __kmp_dist_for_static_init< kmp_uint32 >(
868  loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
869 }
870 
874 void
876  ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
877  kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
878  kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
879 {
880  __kmp_dist_for_static_init< kmp_int64 >(
881  loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
882 }
883 
887 void
889  ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
890  kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
891  kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
892 {
893  __kmp_dist_for_static_init< kmp_uint64 >(
894  loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
895 }
900 //-----------------------------------------------------------------------------------------
901 // Auxiliary routines for Distribute Parallel Loop construct implementation
902 // Transfer call to template< type T >
903 // __kmp_team_static_init( ident_t *loc, int gtid,
904 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
905 
925 void
927  ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
928  kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
929 {
930  KMP_DEBUG_ASSERT( __kmp_init_serial );
931  __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
932 }
933 
937 void
939  ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
940  kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
941 {
942  KMP_DEBUG_ASSERT( __kmp_init_serial );
943  __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
944 }
945 
949 void
951  ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
952  kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
953 {
954  KMP_DEBUG_ASSERT( __kmp_init_serial );
955  __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
956 }
957 
961 void
963  ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
964  kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
965 {
966  KMP_DEBUG_ASSERT( __kmp_init_serial );
967  __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
968 }
973 } // extern "C"
974 
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:950
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:862
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:735
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:926
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:803
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:747
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:938
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:791
Definition: kmp.h:200
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:849
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:962
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:888
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:779
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:875
char const * psource
Definition: kmp.h:209
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:815