2 * kmp_sched.cpp -- static scheduling -- iteration initialization
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 /* Static scheduling initialization.
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
20 #include "kmp_error.h"
23 #include "kmp_stats.h"
27 #include "ompt-specific.h"
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
42 #define KMP_STATS_LOOP_END(stat) \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
50 } else if (i == -1) { \
53 t = (u - l) / i + 1; \
55 t = (l - u) / (-i) + 1; \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66 kmp_int32 schedtype, kmp_int32 *plastiter,
68 typename traits_t<T>::signed_t *pstride,
69 typename traits_t<T>::signed_t incr,
70 typename traits_t<T>::signed_t chunk
71 #if OMPT_SUPPORT && OMPT_OPTIONAL
76 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
80 typedef typename traits_t<T>::unsigned_t UT;
81 typedef typename traits_t<T>::signed_t ST;
82 /* this all has to be changed back to TID and such.. */
83 kmp_int32 gtid = global_tid;
88 kmp_info_t *th = __kmp_threads[gtid];
90 #if OMPT_SUPPORT && OMPT_OPTIONAL
91 ompt_team_info_t *team_info = NULL;
92 ompt_task_info_t *task_info = NULL;
93 ompt_work_t ompt_work_type = ompt_work_loop;
95 static kmp_int8 warn = 0;
97 if (ompt_enabled.ompt_callback_work) {
98 // Only fully initialize variables needed by OMPT if OMPT is enabled.
99 team_info = __ompt_get_teaminfo(0, NULL);
100 task_info = __ompt_get_task_info_object(0);
101 // Determine workshare type
103 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104 ompt_work_type = ompt_work_loop;
105 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106 ompt_work_type = ompt_work_sections;
107 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108 ompt_work_type = ompt_work_distribute;
111 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
113 KMP_WARNING(OmptOutdatedWorkshare);
115 KMP_DEBUG_ASSERT(ompt_work_type);
120 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
125 // create format specifiers before the debug output
126 buff = __kmp_str_format(
127 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132 *pstride, incr, chunk));
133 __kmp_str_free(&buff);
137 if (__kmp_env_consistency_check) {
138 __kmp_push_workshare(global_tid, ct_pdo, loc);
140 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
144 /* special handling for zero-trip loops */
145 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146 if (plastiter != NULL)
148 /* leave pupper and plower set to entire iteration space */
149 *pstride = incr; /* value should never be used */
150 // *plower = *pupper - incr;
151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
157 // create format specifiers before the debug output
158 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159 "lower=%%%s upper=%%%s stride = %%%s "
160 "signed?<%s>, loc = %%s\n",
161 traits_t<T>::spec, traits_t<T>::spec,
162 traits_t<ST>::spec, traits_t<T>::spec);
164 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165 __kmp_str_free(&buff);
168 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
170 #if OMPT_SUPPORT && OMPT_OPTIONAL
171 if (ompt_enabled.ompt_callback_work) {
172 ompt_callbacks.ompt_callback(ompt_callback_work)(
173 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174 &(task_info->task_data), 0, codeptr);
177 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
181 // Although there are schedule enumerations above kmp_ord_upper which are not
182 // schedules for "distribute", the only ones which are useful are dynamic, so
183 // cannot be seen here, since this codepath is only executed for static
185 if (schedtype > kmp_ord_upper) {
186 // we are in DISTRIBUTE construct
187 schedtype += kmp_sch_static -
188 kmp_distribute_static; // AC: convert to usual schedule type
189 tid = th->th.th_team->t.t_master_tid;
190 team = th->th.th_team->t.t_parent;
192 tid = __kmp_tid_from_gtid(global_tid);
193 team = th->th.th_team;
196 /* determine if "for" loop is an active worksharing construct */
197 if (team->t.t_serialized) {
198 /* serialized parallel, each thread executes whole iteration space */
199 if (plastiter != NULL)
201 /* leave pupper and plower set to entire iteration space */
203 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
208 // create format specifiers before the debug output
209 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
210 "lower=%%%s upper=%%%s stride = %%%s\n",
211 traits_t<T>::spec, traits_t<T>::spec,
213 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
214 __kmp_str_free(&buff);
217 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
219 #if OMPT_SUPPORT && OMPT_OPTIONAL
220 if (ompt_enabled.ompt_callback_work) {
221 ompt_callbacks.ompt_callback(ompt_callback_work)(
222 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
223 &(task_info->task_data), *pstride, codeptr);
226 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
229 nth = team->t.t_nproc;
231 if (plastiter != NULL)
234 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
238 // create format specifiers before the debug output
239 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
240 "lower=%%%s upper=%%%s stride = %%%s\n",
241 traits_t<T>::spec, traits_t<T>::spec,
243 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
244 __kmp_str_free(&buff);
247 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
249 #if OMPT_SUPPORT && OMPT_OPTIONAL
250 if (ompt_enabled.ompt_callback_work) {
251 ompt_callbacks.ompt_callback(ompt_callback_work)(
252 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
253 &(task_info->task_data), *pstride, codeptr);
256 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
260 /* compute trip count */
262 trip_count = *pupper - *plower + 1;
263 } else if (incr == -1) {
264 trip_count = *plower - *pupper + 1;
265 } else if (incr > 0) {
266 // upper-lower can exceed the limit of signed type
267 trip_count = (UT)(*pupper - *plower) / incr + 1;
269 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
272 #if KMP_STATS_ENABLED
273 if (KMP_MASTER_GTID(gtid)) {
274 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
278 if (__kmp_env_consistency_check) {
279 /* tripcount overflow? */
280 if (trip_count == 0 && *pupper != *plower) {
281 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
286 /* compute remaining parameters */
288 case kmp_sch_static: {
289 if (trip_count < nth) {
291 __kmp_static == kmp_sch_static_greedy ||
293 kmp_sch_static_balanced); // Unknown static scheduling type.
294 if (tid < trip_count) {
295 *pupper = *plower = *plower + tid * incr;
297 *plower = *pupper + incr;
299 if (plastiter != NULL)
300 *plastiter = (tid == trip_count - 1);
302 if (__kmp_static == kmp_sch_static_balanced) {
303 UT small_chunk = trip_count / nth;
304 UT extras = trip_count % nth;
305 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
306 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
307 if (plastiter != NULL)
308 *plastiter = (tid == nth - 1);
310 T big_chunk_inc_count =
311 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
312 T old_upper = *pupper;
314 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
315 // Unknown static scheduling type.
317 *plower += tid * big_chunk_inc_count;
318 *pupper = *plower + big_chunk_inc_count - incr;
320 if (*pupper < *plower)
321 *pupper = traits_t<T>::max_value;
322 if (plastiter != NULL)
323 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
324 if (*pupper > old_upper)
325 *pupper = old_upper; // tracker C73258
327 if (*pupper > *plower)
328 *pupper = traits_t<T>::min_value;
329 if (plastiter != NULL)
330 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
331 if (*pupper < old_upper)
332 *pupper = old_upper; // tracker C73258
336 *pstride = trip_count;
339 case kmp_sch_static_chunked: {
345 *pstride = span * nth;
346 *plower = *plower + (span * tid);
347 *pupper = *plower + span - incr;
348 if (plastiter != NULL)
349 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
352 case kmp_sch_static_balanced_chunked: {
353 T old_upper = *pupper;
354 // round up to make sure the chunk is enough to cover all iterations
355 UT span = (trip_count + nth - 1) / nth;
357 // perform chunk adjustment
358 chunk = (span + chunk - 1) & ~(chunk - 1);
361 *plower = *plower + (span * tid);
362 *pupper = *plower + span - incr;
364 if (*pupper > old_upper)
366 } else if (*pupper < old_upper)
369 if (plastiter != NULL)
370 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
374 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
379 // Report loop metadata
380 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
381 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
382 team->t.t_active_level == 1) {
383 kmp_uint64 cur_chunk = chunk;
384 // Calculate chunk in case it was not specified; it is specified for
385 // kmp_sch_static_chunked
386 if (schedtype == kmp_sch_static) {
387 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
389 // 0 - "static" schedule
390 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
396 // create format specifiers before the debug output
397 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
398 "upper=%%%s stride = %%%s signed?<%s>\n",
399 traits_t<T>::spec, traits_t<T>::spec,
400 traits_t<ST>::spec, traits_t<T>::spec);
401 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
402 __kmp_str_free(&buff);
405 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
407 #if OMPT_SUPPORT && OMPT_OPTIONAL
408 if (ompt_enabled.ompt_callback_work) {
409 ompt_callbacks.ompt_callback(ompt_callback_work)(
410 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
411 &(task_info->task_data), trip_count, codeptr);
415 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
419 template <typename T>
420 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
421 kmp_int32 schedule, kmp_int32 *plastiter,
422 T *plower, T *pupper, T *pupperDist,
423 typename traits_t<T>::signed_t *pstride,
424 typename traits_t<T>::signed_t incr,
425 typename traits_t<T>::signed_t chunk) {
426 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
427 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
428 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
429 typedef typename traits_t<T>::unsigned_t UT;
430 typedef typename traits_t<T>::signed_t ST;
439 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
440 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
444 // create format specifiers before the debug output
445 buff = __kmp_str_format(
446 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
447 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
448 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
449 traits_t<ST>::spec, traits_t<T>::spec);
451 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
452 __kmp_str_free(&buff);
456 if (__kmp_env_consistency_check) {
457 __kmp_push_workshare(gtid, ct_pdo, loc);
459 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
462 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
463 // The loop is illegal.
464 // Some zero-trip loops maintained by compiler, e.g.:
465 // for(i=10;i<0;++i) // lower >= upper - run-time check
466 // for(i=0;i>10;--i) // lower <= upper - run-time check
467 // for(i=0;i>10;++i) // incr > 0 - compile-time check
468 // for(i=10;i<0;--i) // incr < 0 - compile-time check
469 // Compiler does not check the following illegal loops:
470 // for(i=0;i<10;i+=incr) // where incr<0
471 // for(i=10;i>0;i-=incr) // where incr<0
472 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
475 tid = __kmp_tid_from_gtid(gtid);
476 th = __kmp_threads[gtid];
477 nth = th->th.th_team_nproc;
478 team = th->th.th_team;
479 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
480 nteams = th->th.th_teams_size.nteams;
481 team_id = team->t.t_master_tid;
482 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
484 // compute global trip count
486 trip_count = *pupper - *plower + 1;
487 } else if (incr == -1) {
488 trip_count = *plower - *pupper + 1;
489 } else if (incr > 0) {
490 // upper-lower can exceed the limit of signed type
491 trip_count = (UT)(*pupper - *plower) / incr + 1;
493 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
496 *pstride = *pupper - *plower; // just in case (can be unused)
497 if (trip_count <= nteams) {
499 __kmp_static == kmp_sch_static_greedy ||
501 kmp_sch_static_balanced); // Unknown static scheduling type.
502 // only masters of some teams get single iteration, other threads get
504 if (team_id < trip_count && tid == 0) {
505 *pupper = *pupperDist = *plower = *plower + team_id * incr;
507 *pupperDist = *pupper;
508 *plower = *pupper + incr; // compiler should skip loop body
510 if (plastiter != NULL)
511 *plastiter = (tid == 0 && team_id == trip_count - 1);
513 // Get the team's chunk first (each team gets at most one chunk)
514 if (__kmp_static == kmp_sch_static_balanced) {
515 UT chunkD = trip_count / nteams;
516 UT extras = trip_count % nteams;
518 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
519 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
520 if (plastiter != NULL)
521 *plastiter = (team_id == nteams - 1);
524 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
526 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
527 // Unknown static scheduling type.
528 *plower += team_id * chunk_inc_count;
529 *pupperDist = *plower + chunk_inc_count - incr;
530 // Check/correct bounds if needed
532 if (*pupperDist < *plower)
533 *pupperDist = traits_t<T>::max_value;
534 if (plastiter != NULL)
535 *plastiter = *plower <= upper && *pupperDist > upper - incr;
536 if (*pupperDist > upper)
537 *pupperDist = upper; // tracker C73258
538 if (*plower > *pupperDist) {
539 *pupper = *pupperDist; // no iterations available for the team
543 if (*pupperDist > *plower)
544 *pupperDist = traits_t<T>::min_value;
545 if (plastiter != NULL)
546 *plastiter = *plower >= upper && *pupperDist < upper - incr;
547 if (*pupperDist < upper)
548 *pupperDist = upper; // tracker C73258
549 if (*plower < *pupperDist) {
550 *pupper = *pupperDist; // no iterations available for the team
555 // Get the parallel loop chunk now (for thread)
556 // compute trip count for team's chunk
558 trip_count = *pupperDist - *plower + 1;
559 } else if (incr == -1) {
560 trip_count = *plower - *pupperDist + 1;
561 } else if (incr > 1) {
562 // upper-lower can exceed the limit of signed type
563 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
565 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
567 KMP_DEBUG_ASSERT(trip_count);
569 case kmp_sch_static: {
570 if (trip_count <= nth) {
572 __kmp_static == kmp_sch_static_greedy ||
574 kmp_sch_static_balanced); // Unknown static scheduling type.
575 if (tid < trip_count)
576 *pupper = *plower = *plower + tid * incr;
578 *plower = *pupper + incr; // no iterations available
579 if (plastiter != NULL)
580 if (*plastiter != 0 && !(tid == trip_count - 1))
583 if (__kmp_static == kmp_sch_static_balanced) {
584 UT chunkL = trip_count / nth;
585 UT extras = trip_count % nth;
586 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
587 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
588 if (plastiter != NULL)
589 if (*plastiter != 0 && !(tid == nth - 1))
593 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
594 T upper = *pupperDist;
595 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
596 // Unknown static scheduling type.
597 *plower += tid * chunk_inc_count;
598 *pupper = *plower + chunk_inc_count - incr;
600 if (*pupper < *plower)
601 *pupper = traits_t<T>::max_value;
602 if (plastiter != NULL)
603 if (*plastiter != 0 &&
604 !(*plower <= upper && *pupper > upper - incr))
607 *pupper = upper; // tracker C73258
609 if (*pupper > *plower)
610 *pupper = traits_t<T>::min_value;
611 if (plastiter != NULL)
612 if (*plastiter != 0 &&
613 !(*plower >= upper && *pupper < upper - incr))
616 *pupper = upper; // tracker C73258
622 case kmp_sch_static_chunked: {
627 *pstride = span * nth;
628 *plower = *plower + (span * tid);
629 *pupper = *plower + span - incr;
630 if (plastiter != NULL)
631 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
637 "__kmpc_dist_for_static_init: unknown loop scheduling type");
645 // create format specifiers before the debug output
646 buff = __kmp_str_format(
647 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
648 "stride=%%%s signed?<%s>\n",
649 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
650 traits_t<ST>::spec, traits_t<T>::spec);
651 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
652 __kmp_str_free(&buff);
655 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
656 KMP_STATS_LOOP_END(OMP_distribute_iterations);
660 template <typename T>
661 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
662 kmp_int32 *p_last, T *p_lb, T *p_ub,
663 typename traits_t<T>::signed_t *p_st,
664 typename traits_t<T>::signed_t incr,
665 typename traits_t<T>::signed_t chunk) {
666 // The routine returns the first chunk distributed to the team and
667 // stride for next chunks calculation.
668 // Last iteration flag set for the team that will execute
669 // the last iteration of the loop.
670 // The routine is called for dist_schedule(static,chunk) only.
671 typedef typename traits_t<T>::unsigned_t UT;
672 typedef typename traits_t<T>::signed_t ST;
682 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
683 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
687 // create format specifiers before the debug output
688 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
689 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
690 traits_t<T>::spec, traits_t<T>::spec,
691 traits_t<ST>::spec, traits_t<ST>::spec,
693 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
694 __kmp_str_free(&buff);
700 if (__kmp_env_consistency_check) {
702 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
705 if (incr > 0 ? (upper < lower) : (lower < upper)) {
706 // The loop is illegal.
707 // Some zero-trip loops maintained by compiler, e.g.:
708 // for(i=10;i<0;++i) // lower >= upper - run-time check
709 // for(i=0;i>10;--i) // lower <= upper - run-time check
710 // for(i=0;i>10;++i) // incr > 0 - compile-time check
711 // for(i=10;i<0;--i) // incr < 0 - compile-time check
712 // Compiler does not check the following illegal loops:
713 // for(i=0;i<10;i+=incr) // where incr<0
714 // for(i=10;i>0;i-=incr) // where incr<0
715 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
718 th = __kmp_threads[gtid];
719 team = th->th.th_team;
720 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
721 nteams = th->th.th_teams_size.nteams;
722 team_id = team->t.t_master_tid;
723 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
725 // compute trip count
727 trip_count = upper - lower + 1;
728 } else if (incr == -1) {
729 trip_count = lower - upper + 1;
730 } else if (incr > 0) {
731 // upper-lower can exceed the limit of signed type
732 trip_count = (UT)(upper - lower) / incr + 1;
734 trip_count = (UT)(lower - upper) / (-incr) + 1;
739 *p_st = span * nteams;
740 *p_lb = lower + (span * team_id);
741 *p_ub = *p_lb + span - incr;
743 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
744 // Correct upper bound if needed
746 if (*p_ub < *p_lb) // overflow?
747 *p_ub = traits_t<T>::max_value;
749 *p_ub = upper; // tracker C73258
752 *p_ub = traits_t<T>::min_value;
754 *p_ub = upper; // tracker C73258
759 // create format specifiers before the debug output
761 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
762 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
763 traits_t<T>::spec, traits_t<T>::spec,
764 traits_t<ST>::spec, traits_t<ST>::spec);
765 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
766 __kmp_str_free(&buff);
771 //------------------------------------------------------------------------------
774 @ingroup WORK_SHARING
775 @param loc Source code location
776 @param gtid Global thread id of this thread
777 @param schedtype Scheduling type
778 @param plastiter Pointer to the "last iteration" flag
779 @param plower Pointer to the lower bound
780 @param pupper Pointer to the upper bound
781 @param pstride Pointer to the stride
782 @param incr Loop increment
783 @param chunk The chunk size
785 Each of the four functions here are identical apart from the argument types.
787 The functions compute the upper and lower bounds and stride to be used for the
788 set of iterations to be executed by the current thread from the statically
789 scheduled loop that is described by the initial values of the bounds, stride,
790 increment and chunk size.
794 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
795 kmp_int32 *plastiter, kmp_int32 *plower,
796 kmp_int32 *pupper, kmp_int32 *pstride,
797 kmp_int32 incr, kmp_int32 chunk) {
798 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
799 pupper, pstride, incr, chunk
800 #if OMPT_SUPPORT && OMPT_OPTIONAL
802 OMPT_GET_RETURN_ADDRESS(0)
808 See @ref __kmpc_for_static_init_4
810 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
811 kmp_int32 schedtype, kmp_int32 *plastiter,
812 kmp_uint32 *plower, kmp_uint32 *pupper,
813 kmp_int32 *pstride, kmp_int32 incr,
815 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
816 pupper, pstride, incr, chunk
817 #if OMPT_SUPPORT && OMPT_OPTIONAL
819 OMPT_GET_RETURN_ADDRESS(0)
825 See @ref __kmpc_for_static_init_4
827 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
828 kmp_int32 *plastiter, kmp_int64 *plower,
829 kmp_int64 *pupper, kmp_int64 *pstride,
830 kmp_int64 incr, kmp_int64 chunk) {
831 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
832 pupper, pstride, incr, chunk
833 #if OMPT_SUPPORT && OMPT_OPTIONAL
835 OMPT_GET_RETURN_ADDRESS(0)
841 See @ref __kmpc_for_static_init_4
843 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
844 kmp_int32 schedtype, kmp_int32 *plastiter,
845 kmp_uint64 *plower, kmp_uint64 *pupper,
846 kmp_int64 *pstride, kmp_int64 incr,
848 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
849 pupper, pstride, incr, chunk
850 #if OMPT_SUPPORT && OMPT_OPTIONAL
852 OMPT_GET_RETURN_ADDRESS(0)
861 @ingroup WORK_SHARING
862 @param loc Source code location
863 @param gtid Global thread id of this thread
864 @param schedule Scheduling type for the parallel loop
865 @param plastiter Pointer to the "last iteration" flag
866 @param plower Pointer to the lower bound
867 @param pupper Pointer to the upper bound of loop chunk
868 @param pupperD Pointer to the upper bound of dist_chunk
869 @param pstride Pointer to the stride for parallel loop
870 @param incr Loop increment
871 @param chunk The chunk size for the parallel loop
873 Each of the four functions here are identical apart from the argument types.
875 The functions compute the upper and lower bounds and strides to be used for the
876 set of iterations to be executed by the current thread from the statically
877 scheduled loop that is described by the initial values of the bounds, strides,
878 increment and chunks for parallel loop and distribute constructs.
882 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
883 kmp_int32 schedule, kmp_int32 *plastiter,
884 kmp_int32 *plower, kmp_int32 *pupper,
885 kmp_int32 *pupperD, kmp_int32 *pstride,
886 kmp_int32 incr, kmp_int32 chunk) {
887 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
888 pupper, pupperD, pstride, incr, chunk);
892 See @ref __kmpc_dist_for_static_init_4
894 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
895 kmp_int32 schedule, kmp_int32 *plastiter,
896 kmp_uint32 *plower, kmp_uint32 *pupper,
897 kmp_uint32 *pupperD, kmp_int32 *pstride,
898 kmp_int32 incr, kmp_int32 chunk) {
899 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
900 pupper, pupperD, pstride, incr, chunk);
904 See @ref __kmpc_dist_for_static_init_4
906 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
907 kmp_int32 schedule, kmp_int32 *plastiter,
908 kmp_int64 *plower, kmp_int64 *pupper,
909 kmp_int64 *pupperD, kmp_int64 *pstride,
910 kmp_int64 incr, kmp_int64 chunk) {
911 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
912 pupper, pupperD, pstride, incr, chunk);
916 See @ref __kmpc_dist_for_static_init_4
918 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
919 kmp_int32 schedule, kmp_int32 *plastiter,
920 kmp_uint64 *plower, kmp_uint64 *pupper,
921 kmp_uint64 *pupperD, kmp_int64 *pstride,
922 kmp_int64 incr, kmp_int64 chunk) {
923 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
924 pupper, pupperD, pstride, incr, chunk);
930 //------------------------------------------------------------------------------
931 // Auxiliary routines for Distribute Parallel Loop construct implementation
932 // Transfer call to template< type T >
933 // __kmp_team_static_init( ident_t *loc, int gtid,
934 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
937 @ingroup WORK_SHARING
939 @param loc Source location
940 @param gtid Global thread id
941 @param p_last pointer to last iteration flag
942 @param p_lb pointer to Lower bound
943 @param p_ub pointer to Upper bound
944 @param p_st Step (or increment if you prefer)
945 @param incr Loop increment
946 @param chunk The chunk size to block with
948 The functions compute the upper and lower bounds and stride to be used for the
949 set of iterations to be executed by the current team from the statically
950 scheduled loop that is described by the initial values of the bounds, stride,
951 increment and chunk for the distribute construct as part of composite distribute
952 parallel loop construct. These functions are all identical apart from the types
956 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
957 kmp_int32 *p_lb, kmp_int32 *p_ub,
958 kmp_int32 *p_st, kmp_int32 incr,
960 KMP_DEBUG_ASSERT(__kmp_init_serial);
961 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
966 See @ref __kmpc_team_static_init_4
968 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
970 kmp_int32 *p_st, kmp_int32 incr,
972 KMP_DEBUG_ASSERT(__kmp_init_serial);
973 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
978 See @ref __kmpc_team_static_init_4
980 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981 kmp_int64 *p_lb, kmp_int64 *p_ub,
982 kmp_int64 *p_st, kmp_int64 incr,
984 KMP_DEBUG_ASSERT(__kmp_init_serial);
985 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
990 See @ref __kmpc_team_static_init_4
992 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
994 kmp_int64 *p_st, kmp_int64 incr,
996 KMP_DEBUG_ASSERT(__kmp_init_serial);
997 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,