LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43  { \
44  kmp_int64 t; \
45  kmp_int64 u = (kmp_int64)(*pupper); \
46  kmp_int64 l = (kmp_int64)(*plower); \
47  kmp_int64 i = (kmp_int64)incr; \
48  if (i == 1) { \
49  t = u - l + 1; \
50  } else if (i == -1) { \
51  t = l - u + 1; \
52  } else if (i > 0) { \
53  t = (u - l) / i + 1; \
54  } else { \
55  t = (l - u) / (-i) + 1; \
56  } \
57  KMP_COUNT_VALUE(stat, t); \
58  KMP_POP_PARTITIONED_TIMER(); \
59  }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66  if (loc == NULL)
67  loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72  kmp_int32 schedtype, kmp_int32 *plastiter,
73  T *plower, T *pupper,
74  typename traits_t<T>::signed_t *pstride,
75  typename traits_t<T>::signed_t incr,
76  typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78  ,
79  void *codeptr
80 #endif
81 ) {
82  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86  typedef typename traits_t<T>::unsigned_t UT;
87  typedef typename traits_t<T>::signed_t ST;
88  /* this all has to be changed back to TID and such.. */
89  kmp_int32 gtid = global_tid;
90  kmp_uint32 tid;
91  kmp_uint32 nth;
92  UT trip_count;
93  kmp_team_t *team;
94  __kmp_assert_valid_gtid(gtid);
95  kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98  ompt_team_info_t *team_info = NULL;
99  ompt_task_info_t *task_info = NULL;
100  ompt_work_t ompt_work_type = ompt_work_loop;
101 
102  static kmp_int8 warn = 0;
103 
104  if (ompt_enabled.ompt_callback_work) {
105  // Only fully initialize variables needed by OMPT if OMPT is enabled.
106  team_info = __ompt_get_teaminfo(0, NULL);
107  task_info = __ompt_get_task_info_object(0);
108  // Determine workshare type
109  if (loc != NULL) {
110  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111  ompt_work_type = ompt_work_loop;
112  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113  ompt_work_type = ompt_work_sections;
114  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115  ompt_work_type = ompt_work_distribute;
116  } else {
117  kmp_int8 bool_res =
118  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119  if (bool_res)
120  KMP_WARNING(OmptOutdatedWorkshare);
121  }
122  KMP_DEBUG_ASSERT(ompt_work_type);
123  }
124  }
125 #endif
126 
127  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130  {
131  char *buff;
132  // create format specifiers before the debug output
133  buff = __kmp_str_format(
134  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139  *pstride, incr, chunk));
140  __kmp_str_free(&buff);
141  }
142 #endif
143 
144  if (__kmp_env_consistency_check) {
145  __kmp_push_workshare(global_tid, ct_pdo, loc);
146  if (incr == 0) {
147  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148  loc);
149  }
150  }
151  /* special handling for zero-trip loops */
152  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153  if (plastiter != NULL)
154  *plastiter = FALSE;
155  /* leave pupper and plower set to entire iteration space */
156  *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162  {
163  char *buff;
164  // create format specifiers before the debug output
165  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166  "lower=%%%s upper=%%%s stride = %%%s "
167  "signed?<%s>, loc = %%s\n",
168  traits_t<T>::spec, traits_t<T>::spec,
169  traits_t<ST>::spec, traits_t<T>::spec);
170  check_loc(loc);
171  KD_TRACE(100,
172  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173  __kmp_str_free(&buff);
174  }
175 #endif
176  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177 
178 #if OMPT_SUPPORT && OMPT_OPTIONAL
179  if (ompt_enabled.ompt_callback_work) {
180  ompt_callbacks.ompt_callback(ompt_callback_work)(
181  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182  &(task_info->task_data), 0, codeptr);
183  }
184 #endif
185  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186  return;
187  }
188 
189  // Although there are schedule enumerations above kmp_ord_upper which are not
190  // schedules for "distribute", the only ones which are useful are dynamic, so
191  // cannot be seen here, since this codepath is only executed for static
192  // schedules.
193  if (schedtype > kmp_ord_upper) {
194  // we are in DISTRIBUTE construct
195  schedtype += kmp_sch_static -
196  kmp_distribute_static; // AC: convert to usual schedule type
197  if (th->th.th_team->t.t_serialized > 1) {
198  tid = 0;
199  team = th->th.th_team;
200  } else {
201  tid = th->th.th_team->t.t_master_tid;
202  team = th->th.th_team->t.t_parent;
203  }
204  } else {
205  tid = __kmp_tid_from_gtid(global_tid);
206  team = th->th.th_team;
207  }
208 
209  /* determine if "for" loop is an active worksharing construct */
210  if (team->t.t_serialized) {
211  /* serialized parallel, each thread executes whole iteration space */
212  if (plastiter != NULL)
213  *plastiter = TRUE;
214  /* leave pupper and plower set to entire iteration space */
215  *pstride =
216  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
217 
218 #ifdef KMP_DEBUG
219  {
220  char *buff;
221  // create format specifiers before the debug output
222  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
223  "lower=%%%s upper=%%%s stride = %%%s\n",
224  traits_t<T>::spec, traits_t<T>::spec,
225  traits_t<ST>::spec);
226  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
227  __kmp_str_free(&buff);
228  }
229 #endif
230  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
231 
232 #if OMPT_SUPPORT && OMPT_OPTIONAL
233  if (ompt_enabled.ompt_callback_work) {
234  ompt_callbacks.ompt_callback(ompt_callback_work)(
235  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
236  &(task_info->task_data), *pstride, codeptr);
237  }
238 #endif
239  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
240  return;
241  }
242  nth = team->t.t_nproc;
243  if (nth == 1) {
244  if (plastiter != NULL)
245  *plastiter = TRUE;
246  *pstride =
247  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
248 #ifdef KMP_DEBUG
249  {
250  char *buff;
251  // create format specifiers before the debug output
252  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
253  "lower=%%%s upper=%%%s stride = %%%s\n",
254  traits_t<T>::spec, traits_t<T>::spec,
255  traits_t<ST>::spec);
256  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
257  __kmp_str_free(&buff);
258  }
259 #endif
260  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
261 
262 #if OMPT_SUPPORT && OMPT_OPTIONAL
263  if (ompt_enabled.ompt_callback_work) {
264  ompt_callbacks.ompt_callback(ompt_callback_work)(
265  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
266  &(task_info->task_data), *pstride, codeptr);
267  }
268 #endif
269  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
270  return;
271  }
272 
273  /* compute trip count */
274  if (incr == 1) {
275  trip_count = *pupper - *plower + 1;
276  } else if (incr == -1) {
277  trip_count = *plower - *pupper + 1;
278  } else if (incr > 0) {
279  // upper-lower can exceed the limit of signed type
280  trip_count = (UT)(*pupper - *plower) / incr + 1;
281  } else {
282  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
283  }
284 
285 #if KMP_STATS_ENABLED
286  if (KMP_MASTER_GTID(gtid)) {
287  KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
288  }
289 #endif
290 
291  if (__kmp_env_consistency_check) {
292  /* tripcount overflow? */
293  if (trip_count == 0 && *pupper != *plower) {
294  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
295  loc);
296  }
297  }
298 
299  /* compute remaining parameters */
300  switch (schedtype) {
301  case kmp_sch_static: {
302  if (trip_count < nth) {
303  KMP_DEBUG_ASSERT(
304  __kmp_static == kmp_sch_static_greedy ||
305  __kmp_static ==
306  kmp_sch_static_balanced); // Unknown static scheduling type.
307  if (tid < trip_count) {
308  *pupper = *plower = *plower + tid * incr;
309  } else {
310  // set bounds so non-active threads execute no iterations
311  *plower = *pupper + (incr > 0 ? 1 : -1);
312  }
313  if (plastiter != NULL)
314  *plastiter = (tid == trip_count - 1);
315  } else {
316  if (__kmp_static == kmp_sch_static_balanced) {
317  UT small_chunk = trip_count / nth;
318  UT extras = trip_count % nth;
319  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
320  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
321  if (plastiter != NULL)
322  *plastiter = (tid == nth - 1);
323  } else {
324  T big_chunk_inc_count =
325  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
326  T old_upper = *pupper;
327 
328  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
329  // Unknown static scheduling type.
330 
331  *plower += tid * big_chunk_inc_count;
332  *pupper = *plower + big_chunk_inc_count - incr;
333  if (incr > 0) {
334  if (*pupper < *plower)
335  *pupper = traits_t<T>::max_value;
336  if (plastiter != NULL)
337  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
338  if (*pupper > old_upper)
339  *pupper = old_upper; // tracker C73258
340  } else {
341  if (*pupper > *plower)
342  *pupper = traits_t<T>::min_value;
343  if (plastiter != NULL)
344  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
345  if (*pupper < old_upper)
346  *pupper = old_upper; // tracker C73258
347  }
348  }
349  }
350  *pstride = trip_count;
351  break;
352  }
353  case kmp_sch_static_chunked: {
354  ST span;
355  UT nchunks;
356  if (chunk < 1)
357  chunk = 1;
358  else if ((UT)chunk > trip_count)
359  chunk = trip_count;
360  nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
361  span = chunk * incr;
362  if (nchunks < nth) {
363  *pstride = span * nchunks;
364  if (tid < nchunks) {
365  *plower = *plower + (span * tid);
366  *pupper = *plower + span - incr;
367  } else {
368  *plower = *pupper + (incr > 0 ? 1 : -1);
369  }
370  } else {
371  *pstride = span * nth;
372  *plower = *plower + (span * tid);
373  *pupper = *plower + span - incr;
374  }
375  if (plastiter != NULL)
376  *plastiter = (tid == (nchunks - 1) % nth);
377  break;
378  }
379  case kmp_sch_static_balanced_chunked: {
380  T old_upper = *pupper;
381  // round up to make sure the chunk is enough to cover all iterations
382  UT span = (trip_count + nth - 1) / nth;
383 
384  // perform chunk adjustment
385  chunk = (span + chunk - 1) & ~(chunk - 1);
386 
387  span = chunk * incr;
388  *plower = *plower + (span * tid);
389  *pupper = *plower + span - incr;
390  if (incr > 0) {
391  if (*pupper > old_upper)
392  *pupper = old_upper;
393  } else if (*pupper < old_upper)
394  *pupper = old_upper;
395 
396  if (plastiter != NULL)
397  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
398  break;
399  }
400  default:
401  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
402  break;
403  }
404 
405 #if USE_ITT_BUILD
406  // Report loop metadata
407  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
408  __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
409  team->t.t_active_level == 1) {
410  kmp_uint64 cur_chunk = chunk;
411  check_loc(loc);
412  // Calculate chunk in case it was not specified; it is specified for
413  // kmp_sch_static_chunked
414  if (schedtype == kmp_sch_static) {
415  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
416  }
417  // 0 - "static" schedule
418  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
419  }
420 #endif
421 #ifdef KMP_DEBUG
422  {
423  char *buff;
424  // create format specifiers before the debug output
425  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
426  "upper=%%%s stride = %%%s signed?<%s>\n",
427  traits_t<T>::spec, traits_t<T>::spec,
428  traits_t<ST>::spec, traits_t<T>::spec);
429  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
430  __kmp_str_free(&buff);
431  }
432 #endif
433  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
434 
435 #if OMPT_SUPPORT && OMPT_OPTIONAL
436  if (ompt_enabled.ompt_callback_work) {
437  ompt_callbacks.ompt_callback(ompt_callback_work)(
438  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
439  &(task_info->task_data), trip_count, codeptr);
440  }
441 #endif
442 
443  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
444  return;
445 }
446 
447 template <typename T>
448 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
449  kmp_int32 schedule, kmp_int32 *plastiter,
450  T *plower, T *pupper, T *pupperDist,
451  typename traits_t<T>::signed_t *pstride,
452  typename traits_t<T>::signed_t incr,
453  typename traits_t<T>::signed_t chunk) {
454  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
455  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
456  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
457  typedef typename traits_t<T>::unsigned_t UT;
458  typedef typename traits_t<T>::signed_t ST;
459  kmp_uint32 tid;
460  kmp_uint32 nth;
461  kmp_uint32 team_id;
462  kmp_uint32 nteams;
463  UT trip_count;
464  kmp_team_t *team;
465  kmp_info_t *th;
466 
467  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
468  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
469  __kmp_assert_valid_gtid(gtid);
470 #ifdef KMP_DEBUG
471  {
472  char *buff;
473  // create format specifiers before the debug output
474  buff = __kmp_str_format(
475  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
476  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
477  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
478  traits_t<ST>::spec, traits_t<T>::spec);
479  KD_TRACE(100,
480  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
481  __kmp_str_free(&buff);
482  }
483 #endif
484 
485  if (__kmp_env_consistency_check) {
486  __kmp_push_workshare(gtid, ct_pdo, loc);
487  if (incr == 0) {
488  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
489  loc);
490  }
491  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
492  // The loop is illegal.
493  // Some zero-trip loops maintained by compiler, e.g.:
494  // for(i=10;i<0;++i) // lower >= upper - run-time check
495  // for(i=0;i>10;--i) // lower <= upper - run-time check
496  // for(i=0;i>10;++i) // incr > 0 - compile-time check
497  // for(i=10;i<0;--i) // incr < 0 - compile-time check
498  // Compiler does not check the following illegal loops:
499  // for(i=0;i<10;i+=incr) // where incr<0
500  // for(i=10;i>0;i-=incr) // where incr<0
501  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
502  }
503  }
504  tid = __kmp_tid_from_gtid(gtid);
505  th = __kmp_threads[gtid];
506  nth = th->th.th_team_nproc;
507  team = th->th.th_team;
508  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
509  nteams = th->th.th_teams_size.nteams;
510  team_id = team->t.t_master_tid;
511  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
512 
513  // compute global trip count
514  if (incr == 1) {
515  trip_count = *pupper - *plower + 1;
516  } else if (incr == -1) {
517  trip_count = *plower - *pupper + 1;
518  } else if (incr > 0) {
519  // upper-lower can exceed the limit of signed type
520  trip_count = (UT)(*pupper - *plower) / incr + 1;
521  } else {
522  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
523  }
524 
525  *pstride = *pupper - *plower; // just in case (can be unused)
526  if (trip_count <= nteams) {
527  KMP_DEBUG_ASSERT(
528  __kmp_static == kmp_sch_static_greedy ||
529  __kmp_static ==
530  kmp_sch_static_balanced); // Unknown static scheduling type.
531  // only primary threads of some teams get single iteration, other threads
532  // get nothing
533  if (team_id < trip_count && tid == 0) {
534  *pupper = *pupperDist = *plower = *plower + team_id * incr;
535  } else {
536  *pupperDist = *pupper;
537  *plower = *pupper + incr; // compiler should skip loop body
538  }
539  if (plastiter != NULL)
540  *plastiter = (tid == 0 && team_id == trip_count - 1);
541  } else {
542  // Get the team's chunk first (each team gets at most one chunk)
543  if (__kmp_static == kmp_sch_static_balanced) {
544  UT chunkD = trip_count / nteams;
545  UT extras = trip_count % nteams;
546  *plower +=
547  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
548  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
549  if (plastiter != NULL)
550  *plastiter = (team_id == nteams - 1);
551  } else {
552  T chunk_inc_count =
553  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
554  T upper = *pupper;
555  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
556  // Unknown static scheduling type.
557  *plower += team_id * chunk_inc_count;
558  *pupperDist = *plower + chunk_inc_count - incr;
559  // Check/correct bounds if needed
560  if (incr > 0) {
561  if (*pupperDist < *plower)
562  *pupperDist = traits_t<T>::max_value;
563  if (plastiter != NULL)
564  *plastiter = *plower <= upper && *pupperDist > upper - incr;
565  if (*pupperDist > upper)
566  *pupperDist = upper; // tracker C73258
567  if (*plower > *pupperDist) {
568  *pupper = *pupperDist; // no iterations available for the team
569  goto end;
570  }
571  } else {
572  if (*pupperDist > *plower)
573  *pupperDist = traits_t<T>::min_value;
574  if (plastiter != NULL)
575  *plastiter = *plower >= upper && *pupperDist < upper - incr;
576  if (*pupperDist < upper)
577  *pupperDist = upper; // tracker C73258
578  if (*plower < *pupperDist) {
579  *pupper = *pupperDist; // no iterations available for the team
580  goto end;
581  }
582  }
583  }
584  // Get the parallel loop chunk now (for thread)
585  // compute trip count for team's chunk
586  if (incr == 1) {
587  trip_count = *pupperDist - *plower + 1;
588  } else if (incr == -1) {
589  trip_count = *plower - *pupperDist + 1;
590  } else if (incr > 1) {
591  // upper-lower can exceed the limit of signed type
592  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
593  } else {
594  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
595  }
596  KMP_DEBUG_ASSERT(trip_count);
597  switch (schedule) {
598  case kmp_sch_static: {
599  if (trip_count <= nth) {
600  KMP_DEBUG_ASSERT(
601  __kmp_static == kmp_sch_static_greedy ||
602  __kmp_static ==
603  kmp_sch_static_balanced); // Unknown static scheduling type.
604  if (tid < trip_count)
605  *pupper = *plower = *plower + tid * incr;
606  else
607  *plower = *pupper + incr; // no iterations available
608  if (plastiter != NULL)
609  if (*plastiter != 0 && !(tid == trip_count - 1))
610  *plastiter = 0;
611  } else {
612  if (__kmp_static == kmp_sch_static_balanced) {
613  UT chunkL = trip_count / nth;
614  UT extras = trip_count % nth;
615  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
616  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
617  if (plastiter != NULL)
618  if (*plastiter != 0 && !(tid == nth - 1))
619  *plastiter = 0;
620  } else {
621  T chunk_inc_count =
622  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
623  T upper = *pupperDist;
624  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
625  // Unknown static scheduling type.
626  *plower += tid * chunk_inc_count;
627  *pupper = *plower + chunk_inc_count - incr;
628  if (incr > 0) {
629  if (*pupper < *plower)
630  *pupper = traits_t<T>::max_value;
631  if (plastiter != NULL)
632  if (*plastiter != 0 &&
633  !(*plower <= upper && *pupper > upper - incr))
634  *plastiter = 0;
635  if (*pupper > upper)
636  *pupper = upper; // tracker C73258
637  } else {
638  if (*pupper > *plower)
639  *pupper = traits_t<T>::min_value;
640  if (plastiter != NULL)
641  if (*plastiter != 0 &&
642  !(*plower >= upper && *pupper < upper - incr))
643  *plastiter = 0;
644  if (*pupper < upper)
645  *pupper = upper; // tracker C73258
646  }
647  }
648  }
649  break;
650  }
651  case kmp_sch_static_chunked: {
652  ST span;
653  if (chunk < 1)
654  chunk = 1;
655  span = chunk * incr;
656  *pstride = span * nth;
657  *plower = *plower + (span * tid);
658  *pupper = *plower + span - incr;
659  if (plastiter != NULL)
660  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
661  *plastiter = 0;
662  break;
663  }
664  default:
665  KMP_ASSERT2(0,
666  "__kmpc_dist_for_static_init: unknown loop scheduling type");
667  break;
668  }
669  }
670 end:;
671 #ifdef KMP_DEBUG
672  {
673  char *buff;
674  // create format specifiers before the debug output
675  buff = __kmp_str_format(
676  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
677  "stride=%%%s signed?<%s>\n",
678  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
679  traits_t<ST>::spec, traits_t<T>::spec);
680  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
681  __kmp_str_free(&buff);
682  }
683 #endif
684  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
685  KMP_STATS_LOOP_END(OMP_distribute_iterations);
686  return;
687 }
688 
689 template <typename T>
690 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
691  kmp_int32 *p_last, T *p_lb, T *p_ub,
692  typename traits_t<T>::signed_t *p_st,
693  typename traits_t<T>::signed_t incr,
694  typename traits_t<T>::signed_t chunk) {
695  // The routine returns the first chunk distributed to the team and
696  // stride for next chunks calculation.
697  // Last iteration flag set for the team that will execute
698  // the last iteration of the loop.
699  // The routine is called for dist_schedule(static,chunk) only.
700  typedef typename traits_t<T>::unsigned_t UT;
701  typedef typename traits_t<T>::signed_t ST;
702  kmp_uint32 team_id;
703  kmp_uint32 nteams;
704  UT trip_count;
705  T lower;
706  T upper;
707  ST span;
708  kmp_team_t *team;
709  kmp_info_t *th;
710 
711  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
712  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
713  __kmp_assert_valid_gtid(gtid);
714 #ifdef KMP_DEBUG
715  {
716  char *buff;
717  // create format specifiers before the debug output
718  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
719  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
720  traits_t<T>::spec, traits_t<T>::spec,
721  traits_t<ST>::spec, traits_t<ST>::spec,
722  traits_t<T>::spec);
723  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
724  __kmp_str_free(&buff);
725  }
726 #endif
727 
728  lower = *p_lb;
729  upper = *p_ub;
730  if (__kmp_env_consistency_check) {
731  if (incr == 0) {
732  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
733  loc);
734  }
735  if (incr > 0 ? (upper < lower) : (lower < upper)) {
736  // The loop is illegal.
737  // Some zero-trip loops maintained by compiler, e.g.:
738  // for(i=10;i<0;++i) // lower >= upper - run-time check
739  // for(i=0;i>10;--i) // lower <= upper - run-time check
740  // for(i=0;i>10;++i) // incr > 0 - compile-time check
741  // for(i=10;i<0;--i) // incr < 0 - compile-time check
742  // Compiler does not check the following illegal loops:
743  // for(i=0;i<10;i+=incr) // where incr<0
744  // for(i=10;i>0;i-=incr) // where incr<0
745  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
746  }
747  }
748  th = __kmp_threads[gtid];
749  team = th->th.th_team;
750  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
751  nteams = th->th.th_teams_size.nteams;
752  team_id = team->t.t_master_tid;
753  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
754 
755  // compute trip count
756  if (incr == 1) {
757  trip_count = upper - lower + 1;
758  } else if (incr == -1) {
759  trip_count = lower - upper + 1;
760  } else if (incr > 0) {
761  // upper-lower can exceed the limit of signed type
762  trip_count = (UT)(upper - lower) / incr + 1;
763  } else {
764  trip_count = (UT)(lower - upper) / (-incr) + 1;
765  }
766  if (chunk < 1)
767  chunk = 1;
768  span = chunk * incr;
769  *p_st = span * nteams;
770  *p_lb = lower + (span * team_id);
771  *p_ub = *p_lb + span - incr;
772  if (p_last != NULL)
773  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
774  // Correct upper bound if needed
775  if (incr > 0) {
776  if (*p_ub < *p_lb) // overflow?
777  *p_ub = traits_t<T>::max_value;
778  if (*p_ub > upper)
779  *p_ub = upper; // tracker C73258
780  } else { // incr < 0
781  if (*p_ub > *p_lb)
782  *p_ub = traits_t<T>::min_value;
783  if (*p_ub < upper)
784  *p_ub = upper; // tracker C73258
785  }
786 #ifdef KMP_DEBUG
787  {
788  char *buff;
789  // create format specifiers before the debug output
790  buff =
791  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
792  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
793  traits_t<T>::spec, traits_t<T>::spec,
794  traits_t<ST>::spec, traits_t<ST>::spec);
795  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
796  __kmp_str_free(&buff);
797  }
798 #endif
799 }
800 
801 //------------------------------------------------------------------------------
802 extern "C" {
824 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
825  kmp_int32 *plastiter, kmp_int32 *plower,
826  kmp_int32 *pupper, kmp_int32 *pstride,
827  kmp_int32 incr, kmp_int32 chunk) {
828  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
829  pupper, pstride, incr, chunk
830 #if OMPT_SUPPORT && OMPT_OPTIONAL
831  ,
832  OMPT_GET_RETURN_ADDRESS(0)
833 #endif
834  );
835 }
836 
840 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
841  kmp_int32 schedtype, kmp_int32 *plastiter,
842  kmp_uint32 *plower, kmp_uint32 *pupper,
843  kmp_int32 *pstride, kmp_int32 incr,
844  kmp_int32 chunk) {
845  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
846  pupper, pstride, incr, chunk
847 #if OMPT_SUPPORT && OMPT_OPTIONAL
848  ,
849  OMPT_GET_RETURN_ADDRESS(0)
850 #endif
851  );
852 }
853 
857 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
858  kmp_int32 *plastiter, kmp_int64 *plower,
859  kmp_int64 *pupper, kmp_int64 *pstride,
860  kmp_int64 incr, kmp_int64 chunk) {
861  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
862  pupper, pstride, incr, chunk
863 #if OMPT_SUPPORT && OMPT_OPTIONAL
864  ,
865  OMPT_GET_RETURN_ADDRESS(0)
866 #endif
867  );
868 }
869 
873 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
874  kmp_int32 schedtype, kmp_int32 *plastiter,
875  kmp_uint64 *plower, kmp_uint64 *pupper,
876  kmp_int64 *pstride, kmp_int64 incr,
877  kmp_int64 chunk) {
878  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
879  pupper, pstride, incr, chunk
880 #if OMPT_SUPPORT && OMPT_OPTIONAL
881  ,
882  OMPT_GET_RETURN_ADDRESS(0)
883 #endif
884  );
885 }
912 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
913  kmp_int32 schedule, kmp_int32 *plastiter,
914  kmp_int32 *plower, kmp_int32 *pupper,
915  kmp_int32 *pupperD, kmp_int32 *pstride,
916  kmp_int32 incr, kmp_int32 chunk) {
917  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
918  pupper, pupperD, pstride, incr, chunk);
919 }
920 
924 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
925  kmp_int32 schedule, kmp_int32 *plastiter,
926  kmp_uint32 *plower, kmp_uint32 *pupper,
927  kmp_uint32 *pupperD, kmp_int32 *pstride,
928  kmp_int32 incr, kmp_int32 chunk) {
929  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
930  pupper, pupperD, pstride, incr, chunk);
931 }
932 
936 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
937  kmp_int32 schedule, kmp_int32 *plastiter,
938  kmp_int64 *plower, kmp_int64 *pupper,
939  kmp_int64 *pupperD, kmp_int64 *pstride,
940  kmp_int64 incr, kmp_int64 chunk) {
941  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
942  pupper, pupperD, pstride, incr, chunk);
943 }
944 
948 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
949  kmp_int32 schedule, kmp_int32 *plastiter,
950  kmp_uint64 *plower, kmp_uint64 *pupper,
951  kmp_uint64 *pupperD, kmp_int64 *pstride,
952  kmp_int64 incr, kmp_int64 chunk) {
953  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
954  pupper, pupperD, pstride, incr, chunk);
955 }
960 //------------------------------------------------------------------------------
961 // Auxiliary routines for Distribute Parallel Loop construct implementation
962 // Transfer call to template< type T >
963 // __kmp_team_static_init( ident_t *loc, int gtid,
964 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
965 
986 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
987  kmp_int32 *p_lb, kmp_int32 *p_ub,
988  kmp_int32 *p_st, kmp_int32 incr,
989  kmp_int32 chunk) {
990  KMP_DEBUG_ASSERT(__kmp_init_serial);
991  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
992  chunk);
993 }
994 
998 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
999  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1000  kmp_int32 *p_st, kmp_int32 incr,
1001  kmp_int32 chunk) {
1002  KMP_DEBUG_ASSERT(__kmp_init_serial);
1003  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1004  chunk);
1005 }
1006 
1010 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1011  kmp_int64 *p_lb, kmp_int64 *p_ub,
1012  kmp_int64 *p_st, kmp_int64 incr,
1013  kmp_int64 chunk) {
1014  KMP_DEBUG_ASSERT(__kmp_init_serial);
1015  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1016  chunk);
1017 }
1018 
1022 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1023  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1024  kmp_int64 *p_st, kmp_int64 incr,
1025  kmp_int64 chunk) {
1026  KMP_DEBUG_ASSERT(__kmp_init_serial);
1027  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1028  chunk);
1029 }
1034 } // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:895
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:857
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:936
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:840
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1010
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:924
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1022
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:873
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:998
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:912
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:824
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:986
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:948
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_ord_upper
Definition: kmp.h:392
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236