]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/openmp/runtime/src/kmp_gsupport.cpp
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
[FreeBSD/FreeBSD.git] / contrib / openmp / runtime / src / kmp_gsupport.cpp
1 /*
2  * kmp_gsupport.cpp
3  */
4
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "kmp.h"
15 #include "kmp_atomic.h"
16
17 #if OMPT_SUPPORT
18 #include "ompt-specific.h"
19 #endif
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif // __cplusplus
24
25 #define MKLOC(loc, routine)                                                    \
26   static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
27
28 #include "kmp_ftn_os.h"
29
30 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {
31   int gtid = __kmp_entry_gtid();
32   MKLOC(loc, "GOMP_barrier");
33   KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
34 #if OMPT_SUPPORT && OMPT_OPTIONAL
35   ompt_frame_t *ompt_frame;
36   if (ompt_enabled.enabled) {
37     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
38     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
39     OMPT_STORE_RETURN_ADDRESS(gtid);
40   }
41 #endif
42   __kmpc_barrier(&loc, gtid);
43 #if OMPT_SUPPORT && OMPT_OPTIONAL
44   if (ompt_enabled.enabled) {
45     ompt_frame->enter_frame = ompt_data_none;
46   }
47 #endif
48 }
49
50 // Mutual exclusion
51
52 // The symbol that icc/ifort generates for unnamed for unnamed critical sections
53 // - .gomp_critical_user_ - is defined using .comm in any objects reference it.
54 // We can't reference it directly here in C code, as the symbol contains a ".".
55 //
56 // The RTL contains an assembly language definition of .gomp_critical_user_
57 // with another symbol __kmp_unnamed_critical_addr initialized with it's
58 // address.
59 extern kmp_critical_name *__kmp_unnamed_critical_addr;
60
61 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) {
62   int gtid = __kmp_entry_gtid();
63   MKLOC(loc, "GOMP_critical_start");
64   KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
65 #if OMPT_SUPPORT && OMPT_OPTIONAL
66   OMPT_STORE_RETURN_ADDRESS(gtid);
67 #endif
68   __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
69 }
70
71 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) {
72   int gtid = __kmp_get_gtid();
73   MKLOC(loc, "GOMP_critical_end");
74   KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
75 #if OMPT_SUPPORT && OMPT_OPTIONAL
76   OMPT_STORE_RETURN_ADDRESS(gtid);
77 #endif
78   __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
79 }
80
81 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) {
82   int gtid = __kmp_entry_gtid();
83   MKLOC(loc, "GOMP_critical_name_start");
84   KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));
85   __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);
86 }
87
88 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) {
89   int gtid = __kmp_get_gtid();
90   MKLOC(loc, "GOMP_critical_name_end");
91   KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));
92   __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);
93 }
94
95 // The Gnu codegen tries to use locked operations to perform atomic updates
96 // inline.  If it can't, then it calls GOMP_atomic_start() before performing
97 // the update and GOMP_atomic_end() afterward, regardless of the data type.
98 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) {
99   int gtid = __kmp_entry_gtid();
100   KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
101
102 #if OMPT_SUPPORT
103   __ompt_thread_assign_wait_id(0);
104 #endif
105
106   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
107 }
108
109 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) {
110   int gtid = __kmp_get_gtid();
111   KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid));
112   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
113 }
114
115 int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) {
116   int gtid = __kmp_entry_gtid();
117   MKLOC(loc, "GOMP_single_start");
118   KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));
119
120   if (!TCR_4(__kmp_init_parallel))
121     __kmp_parallel_initialize();
122
123   // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
124   // workshare when USE_CHECKS is defined.  We need to avoid the push,
125   // as there is no corresponding GOMP_single_end() call.
126   kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);
127
128 #if OMPT_SUPPORT && OMPT_OPTIONAL
129   kmp_info_t *this_thr = __kmp_threads[gtid];
130   kmp_team_t *team = this_thr->th.th_team;
131   int tid = __kmp_tid_from_gtid(gtid);
132
133   if (ompt_enabled.enabled) {
134     if (rc) {
135       if (ompt_enabled.ompt_callback_work) {
136         ompt_callbacks.ompt_callback(ompt_callback_work)(
137             ompt_work_single_executor, ompt_scope_begin,
138             &(team->t.ompt_team_info.parallel_data),
139             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
140             1, OMPT_GET_RETURN_ADDRESS(0));
141       }
142     } else {
143       if (ompt_enabled.ompt_callback_work) {
144         ompt_callbacks.ompt_callback(ompt_callback_work)(
145             ompt_work_single_other, ompt_scope_begin,
146             &(team->t.ompt_team_info.parallel_data),
147             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
148             1, OMPT_GET_RETURN_ADDRESS(0));
149         ompt_callbacks.ompt_callback(ompt_callback_work)(
150             ompt_work_single_other, ompt_scope_end,
151             &(team->t.ompt_team_info.parallel_data),
152             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
153             1, OMPT_GET_RETURN_ADDRESS(0));
154       }
155     }
156   }
157 #endif
158
159   return rc;
160 }
161
162 void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
163   void *retval;
164   int gtid = __kmp_entry_gtid();
165   MKLOC(loc, "GOMP_single_copy_start");
166   KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));
167
168   if (!TCR_4(__kmp_init_parallel))
169     __kmp_parallel_initialize();
170
171   // If this is the first thread to enter, return NULL.  The generated code will
172   // then call GOMP_single_copy_end() for this thread only, with the
173   // copyprivate data pointer as an argument.
174   if (__kmp_enter_single(gtid, &loc, FALSE))
175     return NULL;
176
177 // Wait for the first thread to set the copyprivate data pointer,
178 // and for all other threads to reach this point.
179
180 #if OMPT_SUPPORT && OMPT_OPTIONAL
181   ompt_frame_t *ompt_frame;
182   if (ompt_enabled.enabled) {
183     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
184     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
185     OMPT_STORE_RETURN_ADDRESS(gtid);
186   }
187 #endif
188   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
189
190   // Retrieve the value of the copyprivate data point, and wait for all
191   // threads to do likewise, then return.
192   retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
193 #if OMPT_SUPPORT && OMPT_OPTIONAL
194   if (ompt_enabled.enabled) {
195     OMPT_STORE_RETURN_ADDRESS(gtid);
196   }
197 #endif
198   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
199 #if OMPT_SUPPORT && OMPT_OPTIONAL
200   if (ompt_enabled.enabled) {
201     ompt_frame->enter_frame = ompt_data_none;
202   }
203 #endif
204   return retval;
205 }
206
207 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
208   int gtid = __kmp_get_gtid();
209   KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));
210
211   // Set the copyprivate data pointer fo the team, then hit the barrier so that
212   // the other threads will continue on and read it.  Hit another barrier before
213   // continuing, so that the know that the copyprivate data pointer has been
214   // propagated to all threads before trying to reuse the t_copypriv_data field.
215   __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
216 #if OMPT_SUPPORT && OMPT_OPTIONAL
217   ompt_frame_t *ompt_frame;
218   if (ompt_enabled.enabled) {
219     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
220     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
221     OMPT_STORE_RETURN_ADDRESS(gtid);
222   }
223 #endif
224   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
225 #if OMPT_SUPPORT && OMPT_OPTIONAL
226   if (ompt_enabled.enabled) {
227     OMPT_STORE_RETURN_ADDRESS(gtid);
228   }
229 #endif
230   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
231 #if OMPT_SUPPORT && OMPT_OPTIONAL
232   if (ompt_enabled.enabled) {
233     ompt_frame->enter_frame = ompt_data_none;
234   }
235 #endif
236 }
237
238 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) {
239   int gtid = __kmp_entry_gtid();
240   MKLOC(loc, "GOMP_ordered_start");
241   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
242 #if OMPT_SUPPORT && OMPT_OPTIONAL
243   OMPT_STORE_RETURN_ADDRESS(gtid);
244 #endif
245   __kmpc_ordered(&loc, gtid);
246 }
247
248 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {
249   int gtid = __kmp_get_gtid();
250   MKLOC(loc, "GOMP_ordered_end");
251   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
252 #if OMPT_SUPPORT && OMPT_OPTIONAL
253   OMPT_STORE_RETURN_ADDRESS(gtid);
254 #endif
255   __kmpc_end_ordered(&loc, gtid);
256 }
257
258 // Dispatch macro defs
259 //
260 // They come in two flavors: 64-bit unsigned, and either 32-bit signed
261 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
262
263 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
264 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
265 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
266 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4
267 #else
268 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8
269 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8
270 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8
271 #endif /* KMP_ARCH_X86 */
272
273 #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u
274 #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u
275 #define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u
276
277 // The parallel contruct
278
279 #ifndef KMP_DEBUG
280 static
281 #endif /* KMP_DEBUG */
282     void
283     __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
284                                  void *data) {
285 #if OMPT_SUPPORT
286   kmp_info_t *thr;
287   ompt_frame_t *ompt_frame;
288   ompt_state_t enclosing_state;
289
290   if (ompt_enabled.enabled) {
291     // get pointer to thread data structure
292     thr = __kmp_threads[*gtid];
293
294     // save enclosing task state; set current state for task
295     enclosing_state = thr->th.ompt_thread_info.state;
296     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
297
298     // set task frame
299     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
300     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301   }
302 #endif
303
304   task(data);
305
306 #if OMPT_SUPPORT
307   if (ompt_enabled.enabled) {
308     // clear task frame
309     ompt_frame->exit_frame = ompt_data_none;
310
311     // restore enclosing state
312     thr->th.ompt_thread_info.state = enclosing_state;
313   }
314 #endif
315 }
316
317 #ifndef KMP_DEBUG
318 static
319 #endif /* KMP_DEBUG */
320     void
321     __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
322                                           void (*task)(void *), void *data,
323                                           unsigned num_threads, ident_t *loc,
324                                           enum sched_type schedule, long start,
325                                           long end, long incr,
326                                           long chunk_size) {
327   // Intialize the loop worksharing construct.
328
329   KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
330                     schedule != kmp_sch_static);
331
332 #if OMPT_SUPPORT
333   kmp_info_t *thr;
334   ompt_frame_t *ompt_frame;
335   ompt_state_t enclosing_state;
336
337   if (ompt_enabled.enabled) {
338     thr = __kmp_threads[*gtid];
339     // save enclosing task state; set current state for task
340     enclosing_state = thr->th.ompt_thread_info.state;
341     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
342
343     // set task frame
344     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
345     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
346   }
347 #endif
348
349   // Now invoke the microtask.
350   task(data);
351
352 #if OMPT_SUPPORT
353   if (ompt_enabled.enabled) {
354     // clear task frame
355     ompt_frame->exit_frame = ompt_data_none;
356
357     // reset enclosing state
358     thr->th.ompt_thread_info.state = enclosing_state;
359   }
360 #endif
361 }
362
363 #ifndef KMP_DEBUG
364 static
365 #endif /* KMP_DEBUG */
366     void
367     __kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *),
368                          microtask_t wrapper, int argc, ...) {
369   int rc;
370   kmp_info_t *thr = __kmp_threads[gtid];
371   kmp_team_t *team = thr->th.th_team;
372   int tid = __kmp_tid_from_gtid(gtid);
373
374   va_list ap;
375   va_start(ap, argc);
376
377   rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
378                        __kmp_invoke_task_func,
379 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
380                        &ap
381 #else
382                        ap
383 #endif
384                        );
385
386   va_end(ap);
387
388   if (rc) {
389     __kmp_run_before_invoked_task(gtid, tid, thr, team);
390   }
391
392 #if OMPT_SUPPORT
393   int ompt_team_size;
394   if (ompt_enabled.enabled) {
395     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
396     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
397
398     // implicit task callback
399     if (ompt_enabled.ompt_callback_implicit_task) {
400       ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
401       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
402           ompt_scope_begin, &(team_info->parallel_data),
403           &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
404       task_info->thread_num = __kmp_tid_from_gtid(gtid);
405     }
406     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
407   }
408 #endif
409 }
410
411 static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid,
412                                            void (*task)(void *)) {
413 #if OMPT_SUPPORT
414   OMPT_STORE_RETURN_ADDRESS(gtid);
415 #endif
416   __kmp_serialized_parallel(loc, gtid);
417 }
418
419 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
420                                                        void *data,
421                                                        unsigned num_threads) {
422   int gtid = __kmp_entry_gtid();
423
424 #if OMPT_SUPPORT
425   ompt_frame_t *parent_frame, *frame;
426
427   if (ompt_enabled.enabled) {
428     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
429     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
430     OMPT_STORE_RETURN_ADDRESS(gtid);
431   }
432 #endif
433
434   MKLOC(loc, "GOMP_parallel_start");
435   KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
436
437   if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
438     if (num_threads != 0) {
439       __kmp_push_num_threads(&loc, gtid, num_threads);
440     }
441     __kmp_GOMP_fork_call(&loc, gtid, task,
442                          (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
443                          data);
444   } else {
445     __kmp_GOMP_serialized_parallel(&loc, gtid, task);
446   }
447
448 #if OMPT_SUPPORT
449   if (ompt_enabled.enabled) {
450     __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
451     frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
452   }
453 #endif
454 }
455
456 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
457   int gtid = __kmp_get_gtid();
458   kmp_info_t *thr;
459
460   thr = __kmp_threads[gtid];
461
462   MKLOC(loc, "GOMP_parallel_end");
463   KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
464
465   if (!thr->th.th_team->t.t_serialized) {
466     __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
467                                  thr->th.th_team);
468
469 #if OMPT_SUPPORT
470     if (ompt_enabled.enabled) {
471       // Implicit task is finished here, in the barrier we might schedule
472       // deferred tasks,
473       // these don't see the implicit task on the stack
474       OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
475     }
476 #endif
477
478     __kmp_join_call(&loc, gtid
479 #if OMPT_SUPPORT
480                     ,
481                     fork_context_gnu
482 #endif
483                     );
484   } else {
485     __kmpc_end_serialized_parallel(&loc, gtid);
486   }
487 }
488
489 // Loop worksharing constructs
490
491 // The Gnu codegen passes in an exclusive upper bound for the overall range,
492 // but the libguide dispatch code expects an inclusive upper bound, hence the
493 // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th
494 // argument to __kmp_GOMP_fork_call).
495 //
496 // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,
497 // but the Gnu codegen expects an excluside upper bound, so the adjustment
498 // "*p_ub += stride" compenstates for the discrepancy.
499 //
500 // Correction: the gnu codegen always adjusts the upper bound by +-1, not the
501 // stride value.  We adjust the dispatch parameters accordingly (by +-1), but
502 // we still adjust p_ub by the actual stride value.
503 //
504 // The "runtime" versions do not take a chunk_sz parameter.
505 //
506 // The profile lib cannot support construct checking of unordered loops that
507 // are predetermined by the compiler to be statically scheduled, as the gcc
508 // codegen will not always emit calls to GOMP_loop_static_next() to get the
509 // next iteration.  Instead, it emits inline code to call omp_get_thread_num()
510 // num and calculate the iteration space using the result.  It doesn't do this
511 // with ordered static loop, so they can be checked.
512
513 #if OMPT_SUPPORT
514 #define IF_OMPT_SUPPORT(code) code
515 #else
516 #define IF_OMPT_SUPPORT(code)
517 #endif
518
519 #define LOOP_START(func, schedule)                                             \
520   int func(long lb, long ub, long str, long chunk_sz, long *p_lb,              \
521            long *p_ub) {                                                       \
522     int status;                                                                \
523     long stride;                                                               \
524     int gtid = __kmp_entry_gtid();                                             \
525     MKLOC(loc, KMP_STR(func));                                                 \
526     KA_TRACE(                                                                  \
527         20,                                                                    \
528         (KMP_STR(                                                              \
529              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
530          gtid, lb, ub, str, chunk_sz));                                        \
531                                                                                \
532     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
533       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
534       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
535                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
536                         (schedule) != kmp_sch_static);                         \
537       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
538       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
539                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
540       if (status) {                                                            \
541         KMP_DEBUG_ASSERT(stride == str);                                       \
542         *p_ub += (str > 0) ? 1 : -1;                                           \
543       }                                                                        \
544     } else {                                                                   \
545       status = 0;                                                              \
546     }                                                                          \
547                                                                                \
548     KA_TRACE(                                                                  \
549         20,                                                                    \
550         (KMP_STR(                                                              \
551              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
552          gtid, *p_lb, *p_ub, status));                                         \
553     return status;                                                             \
554   }
555
556 #define LOOP_RUNTIME_START(func, schedule)                                     \
557   int func(long lb, long ub, long str, long *p_lb, long *p_ub) {               \
558     int status;                                                                \
559     long stride;                                                               \
560     long chunk_sz = 0;                                                         \
561     int gtid = __kmp_entry_gtid();                                             \
562     MKLOC(loc, KMP_STR(func));                                                 \
563     KA_TRACE(                                                                  \
564         20,                                                                    \
565         (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
566          gtid, lb, ub, str, chunk_sz));                                        \
567                                                                                \
568     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
569       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
570       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
571                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
572       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
573       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
574                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
575       if (status) {                                                            \
576         KMP_DEBUG_ASSERT(stride == str);                                       \
577         *p_ub += (str > 0) ? 1 : -1;                                           \
578       }                                                                        \
579     } else {                                                                   \
580       status = 0;                                                              \
581     }                                                                          \
582                                                                                \
583     KA_TRACE(                                                                  \
584         20,                                                                    \
585         (KMP_STR(                                                              \
586              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
587          gtid, *p_lb, *p_ub, status));                                         \
588     return status;                                                             \
589   }
590
591 #if OMP_45_ENABLED
592 #define KMP_DOACROSS_FINI(status, gtid)                                        \
593   if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) {     \
594     __kmpc_doacross_fini(NULL, gtid);                                          \
595   }
596 #else
597 #define KMP_DOACROSS_FINI(status, gtid) /* Nothing */
598 #endif
599
600 #define LOOP_NEXT(func, fini_code)                                             \
601   int func(long *p_lb, long *p_ub) {                                           \
602     int status;                                                                \
603     long stride;                                                               \
604     int gtid = __kmp_get_gtid();                                               \
605     MKLOC(loc, KMP_STR(func));                                                 \
606     KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid));                            \
607                                                                                \
608     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
609     fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \
610                                          (kmp_int *)p_ub, (kmp_int *)&stride); \
611     if (status) {                                                              \
612       *p_ub += (stride > 0) ? 1 : -1;                                          \
613     }                                                                          \
614     KMP_DOACROSS_FINI(status, gtid)                                            \
615                                                                                \
616     KA_TRACE(                                                                  \
617         20,                                                                    \
618         (KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \
619                        "returning %d\n",                                       \
620          gtid, *p_lb, *p_ub, stride, status));                                 \
621     return status;                                                             \
622   }
623
624 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
625 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
626 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
627            kmp_sch_dynamic_chunked)
628 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
629 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
630            kmp_sch_guided_chunked)
631 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
632 LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
633                    kmp_sch_runtime)
634 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
635
636 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START),
637            kmp_ord_static)
638 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT),
639           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
640 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START),
641            kmp_ord_dynamic_chunked)
642 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT),
643           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
644 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START),
645            kmp_ord_guided_chunked)
646 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT),
647           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
648 LOOP_RUNTIME_START(
649     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START),
650     kmp_ord_runtime)
651 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT),
652           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
653
654 #if OMP_45_ENABLED
655 #define LOOP_DOACROSS_START(func, schedule)                                    \
656   bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb,         \
657             long *p_ub) {                                                      \
658     int status;                                                                \
659     long stride, lb, ub, str;                                                  \
660     int gtid = __kmp_entry_gtid();                                             \
661     struct kmp_dim *dims =                                                     \
662         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
663     MKLOC(loc, KMP_STR(func));                                                 \
664     for (unsigned i = 0; i < ncounts; ++i) {                                   \
665       dims[i].lo = 0;                                                          \
666       dims[i].up = counts[i] - 1;                                              \
667       dims[i].st = 1;                                                          \
668     }                                                                          \
669     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
670     lb = 0;                                                                    \
671     ub = counts[0];                                                            \
672     str = 1;                                                                   \
673     KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \
674                                 "0x%lx, chunk_sz "                             \
675                                 "0x%lx\n",                                     \
676                   gtid, ncounts, lb, ub, str, chunk_sz));                      \
677                                                                                \
678     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
679       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
680                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
681                         (schedule) != kmp_sch_static);                         \
682       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
683                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
684       if (status) {                                                            \
685         KMP_DEBUG_ASSERT(stride == str);                                       \
686         *p_ub += (str > 0) ? 1 : -1;                                           \
687       }                                                                        \
688     } else {                                                                   \
689       status = 0;                                                              \
690     }                                                                          \
691     KMP_DOACROSS_FINI(status, gtid);                                           \
692                                                                                \
693     KA_TRACE(                                                                  \
694         20,                                                                    \
695         (KMP_STR(                                                              \
696              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
697          gtid, *p_lb, *p_ub, status));                                         \
698     __kmp_free(dims);                                                          \
699     return status;                                                             \
700   }
701
702 #define LOOP_DOACROSS_RUNTIME_START(func, schedule)                            \
703   int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) {           \
704     int status;                                                                \
705     long stride, lb, ub, str;                                                  \
706     long chunk_sz = 0;                                                         \
707     int gtid = __kmp_entry_gtid();                                             \
708     struct kmp_dim *dims =                                                     \
709         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
710     MKLOC(loc, KMP_STR(func));                                                 \
711     for (unsigned i = 0; i < ncounts; ++i) {                                   \
712       dims[i].lo = 0;                                                          \
713       dims[i].up = counts[i] - 1;                                              \
714       dims[i].st = 1;                                                          \
715     }                                                                          \
716     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
717     lb = 0;                                                                    \
718     ub = counts[0];                                                            \
719     str = 1;                                                                   \
720     KA_TRACE(                                                                  \
721         20,                                                                    \
722         (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
723          gtid, lb, ub, str, chunk_sz));                                        \
724                                                                                \
725     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
726       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
727                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
728       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
729                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
730       if (status) {                                                            \
731         KMP_DEBUG_ASSERT(stride == str);                                       \
732         *p_ub += (str > 0) ? 1 : -1;                                           \
733       }                                                                        \
734     } else {                                                                   \
735       status = 0;                                                              \
736     }                                                                          \
737     KMP_DOACROSS_FINI(status, gtid);                                           \
738                                                                                \
739     KA_TRACE(                                                                  \
740         20,                                                                    \
741         (KMP_STR(                                                              \
742              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
743          gtid, *p_lb, *p_ub, status));                                         \
744     __kmp_free(dims);                                                          \
745     return status;                                                             \
746   }
747
748 LOOP_DOACROSS_START(
749     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START),
750     kmp_sch_static)
751 LOOP_DOACROSS_START(
752     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START),
753     kmp_sch_dynamic_chunked)
754 LOOP_DOACROSS_START(
755     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START),
756     kmp_sch_guided_chunked)
757 LOOP_DOACROSS_RUNTIME_START(
758     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START),
759     kmp_sch_runtime)
760 #endif // OMP_45_ENABLED
761
762 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) {
763   int gtid = __kmp_get_gtid();
764   KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
765
766 #if OMPT_SUPPORT && OMPT_OPTIONAL
767   ompt_frame_t *ompt_frame;
768   if (ompt_enabled.enabled) {
769     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
770     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
771     OMPT_STORE_RETURN_ADDRESS(gtid);
772   }
773 #endif
774   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
775 #if OMPT_SUPPORT && OMPT_OPTIONAL
776   if (ompt_enabled.enabled) {
777     ompt_frame->enter_frame = ompt_data_none;
778   }
779 #endif
780
781   KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
782 }
783
784 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) {
785   KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
786 }
787
788 // Unsigned long long loop worksharing constructs
789 //
790 // These are new with gcc 4.4
791
792 #define LOOP_START_ULL(func, schedule)                                         \
793   int func(int up, unsigned long long lb, unsigned long long ub,               \
794            unsigned long long str, unsigned long long chunk_sz,                \
795            unsigned long long *p_lb, unsigned long long *p_ub) {               \
796     int status;                                                                \
797     long long str2 = up ? ((long long)str) : -((long long)str);                \
798     long long stride;                                                          \
799     int gtid = __kmp_entry_gtid();                                             \
800     MKLOC(loc, KMP_STR(func));                                                 \
801                                                                                \
802     KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str "    \
803                                 "0x%llx, chunk_sz 0x%llx\n",                   \
804                   gtid, up, lb, ub, str, chunk_sz));                           \
805                                                                                \
806     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
807       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
808                             (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,  \
809                             (schedule) != kmp_sch_static);                     \
810       status =                                                                 \
811           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
812                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
813       if (status) {                                                            \
814         KMP_DEBUG_ASSERT(stride == str2);                                      \
815         *p_ub += (str > 0) ? 1 : -1;                                           \
816       }                                                                        \
817     } else {                                                                   \
818       status = 0;                                                              \
819     }                                                                          \
820                                                                                \
821     KA_TRACE(                                                                  \
822         20,                                                                    \
823         (KMP_STR(                                                              \
824              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
825          gtid, *p_lb, *p_ub, status));                                         \
826     return status;                                                             \
827   }
828
829 #define LOOP_RUNTIME_START_ULL(func, schedule)                                 \
830   int func(int up, unsigned long long lb, unsigned long long ub,               \
831            unsigned long long str, unsigned long long *p_lb,                   \
832            unsigned long long *p_ub) {                                         \
833     int status;                                                                \
834     long long str2 = up ? ((long long)str) : -((long long)str);                \
835     unsigned long long stride;                                                 \
836     unsigned long long chunk_sz = 0;                                           \
837     int gtid = __kmp_entry_gtid();                                             \
838     MKLOC(loc, KMP_STR(func));                                                 \
839                                                                                \
840     KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str "    \
841                                 "0x%llx, chunk_sz 0x%llx\n",                   \
842                   gtid, up, lb, ub, str, chunk_sz));                           \
843                                                                                \
844     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
845       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
846                             (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,  \
847                             TRUE);                                             \
848       status =                                                                 \
849           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
850                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
851       if (status) {                                                            \
852         KMP_DEBUG_ASSERT((long long)stride == str2);                           \
853         *p_ub += (str > 0) ? 1 : -1;                                           \
854       }                                                                        \
855     } else {                                                                   \
856       status = 0;                                                              \
857     }                                                                          \
858                                                                                \
859     KA_TRACE(                                                                  \
860         20,                                                                    \
861         (KMP_STR(                                                              \
862              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
863          gtid, *p_lb, *p_ub, status));                                         \
864     return status;                                                             \
865   }
866
867 #define LOOP_NEXT_ULL(func, fini_code)                                         \
868   int func(unsigned long long *p_lb, unsigned long long *p_ub) {               \
869     int status;                                                                \
870     long long stride;                                                          \
871     int gtid = __kmp_get_gtid();                                               \
872     MKLOC(loc, KMP_STR(func));                                                 \
873     KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid));                            \
874                                                                                \
875     fini_code status =                                                         \
876         KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,            \
877                               (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);       \
878     if (status) {                                                              \
879       *p_ub += (stride > 0) ? 1 : -1;                                          \
880     }                                                                          \
881                                                                                \
882     KA_TRACE(                                                                  \
883         20,                                                                    \
884         (KMP_STR(                                                              \
885              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, "  \
886                    "returning %d\n",                                           \
887          gtid, *p_lb, *p_ub, stride, status));                                 \
888     return status;                                                             \
889   }
890
891 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START),
892                kmp_sch_static)
893 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
894 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START),
895                kmp_sch_dynamic_chunked)
896 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
897 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
898                kmp_sch_guided_chunked)
899 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
900 LOOP_RUNTIME_START_ULL(
901     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
902 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
903
904 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START),
905                kmp_ord_static)
906 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT),
907               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
908 LOOP_START_ULL(
909     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START),
910     kmp_ord_dynamic_chunked)
911 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT),
912               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
913 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START),
914                kmp_ord_guided_chunked)
915 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT),
916               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
917 LOOP_RUNTIME_START_ULL(
918     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START),
919     kmp_ord_runtime)
920 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),
921               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
922
923 #if OMP_45_ENABLED
924 #define LOOP_DOACROSS_START_ULL(func, schedule)                                \
925   int func(unsigned ncounts, unsigned long long *counts,                       \
926            unsigned long long chunk_sz, unsigned long long *p_lb,              \
927            unsigned long long *p_ub) {                                         \
928     int status;                                                                \
929     long long stride, str, lb, ub;                                             \
930     int gtid = __kmp_entry_gtid();                                             \
931     struct kmp_dim *dims =                                                     \
932         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
933     MKLOC(loc, KMP_STR(func));                                                 \
934     for (unsigned i = 0; i < ncounts; ++i) {                                   \
935       dims[i].lo = 0;                                                          \
936       dims[i].up = counts[i] - 1;                                              \
937       dims[i].st = 1;                                                          \
938     }                                                                          \
939     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
940     lb = 0;                                                                    \
941     ub = counts[0];                                                            \
942     str = 1;                                                                   \
943                                                                                \
944     KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str "           \
945                                 "0x%llx, chunk_sz 0x%llx\n",                   \
946                   gtid, lb, ub, str, chunk_sz));                               \
947                                                                                \
948     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
949       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
950                             (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,    \
951                             (schedule) != kmp_sch_static);                     \
952       status =                                                                 \
953           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
954                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
955       if (status) {                                                            \
956         KMP_DEBUG_ASSERT(stride == str);                                       \
957         *p_ub += (str > 0) ? 1 : -1;                                           \
958       }                                                                        \
959     } else {                                                                   \
960       status = 0;                                                              \
961     }                                                                          \
962     KMP_DOACROSS_FINI(status, gtid);                                           \
963                                                                                \
964     KA_TRACE(                                                                  \
965         20,                                                                    \
966         (KMP_STR(                                                              \
967              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
968          gtid, *p_lb, *p_ub, status));                                         \
969     __kmp_free(dims);                                                          \
970     return status;                                                             \
971   }
972
973 #define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule)                        \
974   int func(unsigned ncounts, unsigned long long *counts,                       \
975            unsigned long long *p_lb, unsigned long long *p_ub) {               \
976     int status;                                                                \
977     unsigned long long stride, str, lb, ub;                                    \
978     unsigned long long chunk_sz = 0;                                           \
979     int gtid = __kmp_entry_gtid();                                             \
980     struct kmp_dim *dims =                                                     \
981         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
982     MKLOC(loc, KMP_STR(func));                                                 \
983     for (unsigned i = 0; i < ncounts; ++i) {                                   \
984       dims[i].lo = 0;                                                          \
985       dims[i].up = counts[i] - 1;                                              \
986       dims[i].st = 1;                                                          \
987     }                                                                          \
988     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
989     lb = 0;                                                                    \
990     ub = counts[0];                                                            \
991     str = 1;                                                                   \
992     KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str "           \
993                                 "0x%llx, chunk_sz 0x%llx\n",                   \
994                   gtid, lb, ub, str, chunk_sz));                               \
995                                                                                \
996     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
997       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
998                             (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,    \
999                             TRUE);                                             \
1000       status =                                                                 \
1001           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
1002                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
1003       if (status) {                                                            \
1004         KMP_DEBUG_ASSERT(stride == str);                                       \
1005         *p_ub += (str > 0) ? 1 : -1;                                           \
1006       }                                                                        \
1007     } else {                                                                   \
1008       status = 0;                                                              \
1009     }                                                                          \
1010     KMP_DOACROSS_FINI(status, gtid);                                           \
1011                                                                                \
1012     KA_TRACE(                                                                  \
1013         20,                                                                    \
1014         (KMP_STR(                                                              \
1015              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
1016          gtid, *p_lb, *p_ub, status));                                         \
1017     __kmp_free(dims);                                                          \
1018     return status;                                                             \
1019   }
1020
1021 LOOP_DOACROSS_START_ULL(
1022     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START),
1023     kmp_sch_static)
1024 LOOP_DOACROSS_START_ULL(
1025     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START),
1026     kmp_sch_dynamic_chunked)
1027 LOOP_DOACROSS_START_ULL(
1028     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START),
1029     kmp_sch_guided_chunked)
1030 LOOP_DOACROSS_RUNTIME_START_ULL(
1031     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START),
1032     kmp_sch_runtime)
1033 #endif
1034
1035 // Combined parallel / loop worksharing constructs
1036 //
1037 // There are no ull versions (yet).
1038
1039 #define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post)               \
1040   void func(void (*task)(void *), void *data, unsigned num_threads, long lb,   \
1041             long ub, long str, long chunk_sz) {                                \
1042     int gtid = __kmp_entry_gtid();                                             \
1043     MKLOC(loc, KMP_STR(func));                                                 \
1044     KA_TRACE(                                                                  \
1045         20,                                                                    \
1046         (KMP_STR(                                                              \
1047              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
1048          gtid, lb, ub, str, chunk_sz));                                        \
1049                                                                                \
1050     ompt_pre();                                                                \
1051                                                                                \
1052     if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                       \
1053       if (num_threads != 0) {                                                  \
1054         __kmp_push_num_threads(&loc, gtid, num_threads);                       \
1055       }                                                                        \
1056       __kmp_GOMP_fork_call(&loc, gtid, task,                                   \
1057                            (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1058                            9, task, data, num_threads, &loc, (schedule), lb,   \
1059                            (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);    \
1060       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                        \
1061     } else {                                                                   \
1062       __kmp_GOMP_serialized_parallel(&loc, gtid, task);                        \
1063       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                        \
1064     }                                                                          \
1065                                                                                \
1066     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
1067                       (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,          \
1068                       (schedule) != kmp_sch_static);                           \
1069                                                                                \
1070     ompt_post();                                                               \
1071                                                                                \
1072     KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid));                       \
1073   }
1074
1075 #if OMPT_SUPPORT && OMPT_OPTIONAL
1076
1077 #define OMPT_LOOP_PRE()                                                        \
1078   ompt_frame_t *parent_frame;                                                  \
1079   if (ompt_enabled.enabled) {                                                  \
1080     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);   \
1081     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);                 \
1082     OMPT_STORE_RETURN_ADDRESS(gtid);                                           \
1083   }
1084
1085 #define OMPT_LOOP_POST()                                                       \
1086   if (ompt_enabled.enabled) {                                                  \
1087     parent_frame->enter_frame = ompt_data_none;                                \
1088   }
1089
1090 #else
1091
1092 #define OMPT_LOOP_PRE()
1093
1094 #define OMPT_LOOP_POST()
1095
1096 #endif
1097
1098 PARALLEL_LOOP_START(
1099     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),
1100     kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1101 PARALLEL_LOOP_START(
1102     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),
1103     kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1104 PARALLEL_LOOP_START(
1105     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),
1106     kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1107 PARALLEL_LOOP_START(
1108     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),
1109     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1110
1111 // Tasking constructs
1112
1113 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
1114                                              void (*copy_func)(void *, void *),
1115                                              long arg_size, long arg_align,
1116                                              bool if_cond, unsigned gomp_flags
1117 #if OMP_40_ENABLED
1118                                              ,
1119                                              void **depend
1120 #endif
1121                                              ) {
1122   MKLOC(loc, "GOMP_task");
1123   int gtid = __kmp_entry_gtid();
1124   kmp_int32 flags = 0;
1125   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1126
1127   KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));
1128
1129   // The low-order bit is the "untied" flag
1130   if (!(gomp_flags & 1)) {
1131     input_flags->tiedness = 1;
1132   }
1133   // The second low-order bit is the "final" flag
1134   if (gomp_flags & 2) {
1135     input_flags->final = 1;
1136   }
1137   input_flags->native = 1;
1138   // __kmp_task_alloc() sets up all other flags
1139
1140   if (!if_cond) {
1141     arg_size = 0;
1142   }
1143
1144   kmp_task_t *task = __kmp_task_alloc(
1145       &loc, gtid, input_flags, sizeof(kmp_task_t),
1146       arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func);
1147
1148   if (arg_size > 0) {
1149     if (arg_align > 0) {
1150       task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1151                                arg_align * arg_align);
1152     }
1153     // else error??
1154
1155     if (copy_func) {
1156       (*copy_func)(task->shareds, data);
1157     } else {
1158       KMP_MEMCPY(task->shareds, data, arg_size);
1159     }
1160   }
1161
1162 #if OMPT_SUPPORT
1163   kmp_taskdata_t *current_task;
1164   if (ompt_enabled.enabled) {
1165     OMPT_STORE_RETURN_ADDRESS(gtid);
1166     current_task = __kmp_threads[gtid]->th.th_current_task;
1167     current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1168   }
1169 #endif
1170
1171   if (if_cond) {
1172 #if OMP_40_ENABLED
1173     if (gomp_flags & 8) {
1174       KMP_ASSERT(depend);
1175       const size_t ndeps = (kmp_intptr_t)depend[0];
1176       const size_t nout = (kmp_intptr_t)depend[1];
1177       kmp_depend_info_t dep_list[ndeps];
1178
1179       for (size_t i = 0U; i < ndeps; i++) {
1180         dep_list[i].base_addr = (kmp_intptr_t)depend[2U + i];
1181         dep_list[i].len = 0U;
1182         dep_list[i].flags.in = 1;
1183         dep_list[i].flags.out = (i < nout);
1184       }
1185       __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL);
1186     } else {
1187 #endif
1188       __kmpc_omp_task(&loc, gtid, task);
1189     }
1190   } else {
1191 #if OMPT_SUPPORT
1192     ompt_thread_info_t oldInfo;
1193     kmp_info_t *thread;
1194     kmp_taskdata_t *taskdata;
1195     if (ompt_enabled.enabled) {
1196       // Store the threads states and restore them after the task
1197       thread = __kmp_threads[gtid];
1198       taskdata = KMP_TASK_TO_TASKDATA(task);
1199       oldInfo = thread->th.ompt_thread_info;
1200       thread->th.ompt_thread_info.wait_id = 0;
1201       thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1202       taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1203       OMPT_STORE_RETURN_ADDRESS(gtid);
1204     }
1205 #endif
1206
1207     __kmpc_omp_task_begin_if0(&loc, gtid, task);
1208     func(data);
1209     __kmpc_omp_task_complete_if0(&loc, gtid, task);
1210
1211 #if OMPT_SUPPORT
1212     if (ompt_enabled.enabled) {
1213       thread->th.ompt_thread_info = oldInfo;
1214       taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1215     }
1216 #endif
1217   }
1218 #if OMPT_SUPPORT
1219   if (ompt_enabled.enabled) {
1220     current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
1221   }
1222 #endif
1223
1224   KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
1225 }
1226
1227 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) {
1228   MKLOC(loc, "GOMP_taskwait");
1229   int gtid = __kmp_entry_gtid();
1230
1231 #if OMPT_SUPPORT
1232   if (ompt_enabled.enabled)
1233     OMPT_STORE_RETURN_ADDRESS(gtid);
1234 #endif
1235
1236   KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
1237
1238   __kmpc_omp_taskwait(&loc, gtid);
1239
1240   KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));
1241 }
1242
1243 // Sections worksharing constructs
1244 //
1245 // For the sections construct, we initialize a dynamically scheduled loop
1246 // worksharing construct with lb 1 and stride 1, and use the iteration #'s
1247 // that its returns as sections ids.
1248 //
1249 // There are no special entry points for ordered sections, so we always use
1250 // the dynamically scheduled workshare, even if the sections aren't ordered.
1251
1252 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) {
1253   int status;
1254   kmp_int lb, ub, stride;
1255   int gtid = __kmp_entry_gtid();
1256   MKLOC(loc, "GOMP_sections_start");
1257   KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));
1258
1259   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1260
1261   status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1262   if (status) {
1263     KMP_DEBUG_ASSERT(stride == 1);
1264     KMP_DEBUG_ASSERT(lb > 0);
1265     KMP_ASSERT(lb == ub);
1266   } else {
1267     lb = 0;
1268   }
1269
1270   KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,
1271                 (unsigned)lb));
1272   return (unsigned)lb;
1273 }
1274
1275 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) {
1276   int status;
1277   kmp_int lb, ub, stride;
1278   int gtid = __kmp_get_gtid();
1279   MKLOC(loc, "GOMP_sections_next");
1280   KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));
1281
1282 #if OMPT_SUPPORT
1283   OMPT_STORE_RETURN_ADDRESS(gtid);
1284 #endif
1285
1286   status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1287   if (status) {
1288     KMP_DEBUG_ASSERT(stride == 1);
1289     KMP_DEBUG_ASSERT(lb > 0);
1290     KMP_ASSERT(lb == ub);
1291   } else {
1292     lb = 0;
1293   }
1294
1295   KA_TRACE(
1296       20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb));
1297   return (unsigned)lb;
1298 }
1299
1300 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
1301     void (*task)(void *), void *data, unsigned num_threads, unsigned count) {
1302   int gtid = __kmp_entry_gtid();
1303
1304 #if OMPT_SUPPORT
1305   ompt_frame_t *parent_frame;
1306
1307   if (ompt_enabled.enabled) {
1308     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
1309     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1310     OMPT_STORE_RETURN_ADDRESS(gtid);
1311   }
1312 #endif
1313
1314   MKLOC(loc, "GOMP_parallel_sections_start");
1315   KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
1316
1317   if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
1318     if (num_threads != 0) {
1319       __kmp_push_num_threads(&loc, gtid, num_threads);
1320     }
1321     __kmp_GOMP_fork_call(&loc, gtid, task,
1322                          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1323                          task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1324                          (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1325   } else {
1326     __kmp_GOMP_serialized_parallel(&loc, gtid, task);
1327   }
1328
1329 #if OMPT_SUPPORT
1330   if (ompt_enabled.enabled) {
1331     parent_frame->enter_frame = ompt_data_none;
1332   }
1333 #endif
1334
1335   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1336
1337   KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
1338 }
1339
1340 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
1341   int gtid = __kmp_get_gtid();
1342   KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
1343
1344 #if OMPT_SUPPORT
1345   ompt_frame_t *ompt_frame;
1346   if (ompt_enabled.enabled) {
1347     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1348     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1349     OMPT_STORE_RETURN_ADDRESS(gtid);
1350   }
1351 #endif
1352   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1353 #if OMPT_SUPPORT
1354   if (ompt_enabled.enabled) {
1355     ompt_frame->enter_frame = ompt_data_none;
1356   }
1357 #endif
1358
1359   KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
1360 }
1361
1362 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) {
1363   KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
1364 }
1365
1366 // libgomp has an empty function for GOMP_taskyield as of 2013-10-10
1367 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) {
1368   KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid()))
1369   return;
1370 }
1371
1372 #if OMP_40_ENABLED // these are new GOMP_4.0 entry points
1373
1374 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
1375                                                  void *data,
1376                                                  unsigned num_threads,
1377                                                  unsigned int flags) {
1378   int gtid = __kmp_entry_gtid();
1379   MKLOC(loc, "GOMP_parallel");
1380   KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));
1381
1382 #if OMPT_SUPPORT
1383   ompt_task_info_t *parent_task_info, *task_info;
1384   if (ompt_enabled.enabled) {
1385     parent_task_info = __ompt_get_task_info_object(0);
1386     parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1387     OMPT_STORE_RETURN_ADDRESS(gtid);
1388   }
1389 #endif
1390   if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
1391     if (num_threads != 0) {
1392       __kmp_push_num_threads(&loc, gtid, num_threads);
1393     }
1394     if (flags != 0) {
1395       __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
1396     }
1397     __kmp_GOMP_fork_call(&loc, gtid, task,
1398                          (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
1399                          data);
1400   } else {
1401     __kmp_GOMP_serialized_parallel(&loc, gtid, task);
1402   }
1403 #if OMPT_SUPPORT
1404   if (ompt_enabled.enabled) {
1405     task_info = __ompt_get_task_info_object(0);
1406     task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1407   }
1408 #endif
1409   task(data);
1410 #if OMPT_SUPPORT
1411   if (ompt_enabled.enabled) {
1412     OMPT_STORE_RETURN_ADDRESS(gtid);
1413   }
1414 #endif
1415   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1416 #if OMPT_SUPPORT
1417   if (ompt_enabled.enabled) {
1418     task_info->frame.exit_frame = ompt_data_none;
1419     parent_task_info->frame.enter_frame = ompt_data_none;
1420   }
1421 #endif
1422 }
1423
1424 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
1425                                                           void *data,
1426                                                           unsigned num_threads,
1427                                                           unsigned count,
1428                                                           unsigned flags) {
1429   int gtid = __kmp_entry_gtid();
1430   MKLOC(loc, "GOMP_parallel_sections");
1431   KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
1432
1433 #if OMPT_SUPPORT
1434   OMPT_STORE_RETURN_ADDRESS(gtid);
1435 #endif
1436
1437   if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
1438     if (num_threads != 0) {
1439       __kmp_push_num_threads(&loc, gtid, num_threads);
1440     }
1441     if (flags != 0) {
1442       __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
1443     }
1444     __kmp_GOMP_fork_call(&loc, gtid, task,
1445                          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1446                          task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1447                          (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1448   } else {
1449     __kmp_GOMP_serialized_parallel(&loc, gtid, task);
1450   }
1451
1452 #if OMPT_SUPPORT
1453   OMPT_STORE_RETURN_ADDRESS(gtid);
1454 #endif
1455
1456   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1457
1458   task(data);
1459   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1460   KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
1461 }
1462
1463 #define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post)                     \
1464   void func(void (*task)(void *), void *data, unsigned num_threads, long lb,   \
1465             long ub, long str, long chunk_sz, unsigned flags) {                \
1466     int gtid = __kmp_entry_gtid();                                             \
1467     MKLOC(loc, KMP_STR(func));                                                 \
1468     KA_TRACE(                                                                  \
1469         20,                                                                    \
1470         (KMP_STR(                                                              \
1471              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
1472          gtid, lb, ub, str, chunk_sz));                                        \
1473                                                                                \
1474     ompt_pre();                                                                \
1475     if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                       \
1476       if (num_threads != 0) {                                                  \
1477         __kmp_push_num_threads(&loc, gtid, num_threads);                       \
1478       }                                                                        \
1479       if (flags != 0) {                                                        \
1480         __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);              \
1481       }                                                                        \
1482       __kmp_GOMP_fork_call(&loc, gtid, task,                                   \
1483                            (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1484                            9, task, data, num_threads, &loc, (schedule), lb,   \
1485                            (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);    \
1486     } else {                                                                   \
1487       __kmp_GOMP_serialized_parallel(&loc, gtid, task);                        \
1488     }                                                                          \
1489                                                                                \
1490     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
1491     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
1492                       (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,          \
1493                       (schedule) != kmp_sch_static);                           \
1494     task(data);                                                                \
1495     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();                         \
1496     ompt_post();                                                               \
1497                                                                                \
1498     KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid));                       \
1499   }
1500
1501 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
1502               kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1503 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
1504               kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1505 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
1506               kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1507 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
1508               kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1509
1510 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
1511   int gtid = __kmp_entry_gtid();
1512   MKLOC(loc, "GOMP_taskgroup_start");
1513   KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
1514
1515 #if OMPT_SUPPORT
1516   if (ompt_enabled.enabled)
1517     OMPT_STORE_RETURN_ADDRESS(gtid);
1518 #endif
1519
1520   __kmpc_taskgroup(&loc, gtid);
1521
1522   return;
1523 }
1524
1525 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {
1526   int gtid = __kmp_get_gtid();
1527   MKLOC(loc, "GOMP_taskgroup_end");
1528   KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
1529
1530 #if OMPT_SUPPORT
1531   if (ompt_enabled.enabled)
1532     OMPT_STORE_RETURN_ADDRESS(gtid);
1533 #endif
1534
1535   __kmpc_end_taskgroup(&loc, gtid);
1536
1537   return;
1538 }
1539
1540 #ifndef KMP_DEBUG
1541 static
1542 #endif /* KMP_DEBUG */
1543     kmp_int32
1544     __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) {
1545   kmp_int32 cncl_kind = 0;
1546   switch (gomp_kind) {
1547   case 1:
1548     cncl_kind = cancel_parallel;
1549     break;
1550   case 2:
1551     cncl_kind = cancel_loop;
1552     break;
1553   case 4:
1554     cncl_kind = cancel_sections;
1555     break;
1556   case 8:
1557     cncl_kind = cancel_taskgroup;
1558     break;
1559   }
1560   return cncl_kind;
1561 }
1562
1563 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) {
1564   if (__kmp_omp_cancellation) {
1565     KMP_FATAL(NoGompCancellation);
1566   }
1567   int gtid = __kmp_get_gtid();
1568   MKLOC(loc, "GOMP_cancellation_point");
1569   KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid));
1570
1571   kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1572
1573   return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1574 }
1575
1576 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) {
1577   if (__kmp_omp_cancellation) {
1578     KMP_FATAL(NoGompCancellation);
1579   }
1580   KMP_FATAL(NoGompCancellation);
1581   int gtid = __kmp_get_gtid();
1582   MKLOC(loc, "GOMP_barrier_cancel");
1583   KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid));
1584
1585   return __kmpc_cancel_barrier(&loc, gtid);
1586 }
1587
1588 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) {
1589   if (__kmp_omp_cancellation) {
1590     KMP_FATAL(NoGompCancellation);
1591   } else {
1592     return FALSE;
1593   }
1594
1595   int gtid = __kmp_get_gtid();
1596   MKLOC(loc, "GOMP_cancel");
1597   KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid));
1598
1599   kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1600
1601   if (do_cancel == FALSE) {
1602     return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which);
1603   } else {
1604     return __kmpc_cancel(&loc, gtid, cncl_kind);
1605   }
1606 }
1607
1608 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) {
1609   if (__kmp_omp_cancellation) {
1610     KMP_FATAL(NoGompCancellation);
1611   }
1612   int gtid = __kmp_get_gtid();
1613   MKLOC(loc, "GOMP_sections_end_cancel");
1614   KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid));
1615
1616   return __kmpc_cancel_barrier(&loc, gtid);
1617 }
1618
1619 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) {
1620   if (__kmp_omp_cancellation) {
1621     KMP_FATAL(NoGompCancellation);
1622   }
1623   int gtid = __kmp_get_gtid();
1624   MKLOC(loc, "GOMP_loop_end_cancel");
1625   KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid));
1626
1627   return __kmpc_cancel_barrier(&loc, gtid);
1628 }
1629
1630 // All target functions are empty as of 2014-05-29
1631 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *),
1632                                                const void *openmp_target,
1633                                                size_t mapnum, void **hostaddrs,
1634                                                size_t *sizes,
1635                                                unsigned char *kinds) {
1636   return;
1637 }
1638
1639 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)(
1640     int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1641     size_t *sizes, unsigned char *kinds) {
1642   return;
1643 }
1644
1645 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; }
1646
1647 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)(
1648     int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1649     size_t *sizes, unsigned char *kinds) {
1650   return;
1651 }
1652
1653 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams,
1654                                               unsigned int thread_limit) {
1655   return;
1656 }
1657 #endif // OMP_40_ENABLED
1658
1659 #if OMP_45_ENABLED
1660
1661 // Task duplication function which copies src to dest (both are
1662 // preallocated task structures)
1663 static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src,
1664                                 kmp_int32 last_private) {
1665   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src);
1666   if (taskdata->td_copy_func) {
1667     (taskdata->td_copy_func)(dest->shareds, src->shareds);
1668   }
1669 }
1670
1671 #ifdef __cplusplus
1672 } // extern "C"
1673 #endif
1674
1675 template <typename T>
1676 void __GOMP_taskloop(void (*func)(void *), void *data,
1677                      void (*copy_func)(void *, void *), long arg_size,
1678                      long arg_align, unsigned gomp_flags,
1679                      unsigned long num_tasks, int priority, T start, T end,
1680                      T step) {
1681   typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
1682   MKLOC(loc, "GOMP_taskloop");
1683   int sched;
1684   T *loop_bounds;
1685   int gtid = __kmp_entry_gtid();
1686   kmp_int32 flags = 0;
1687   int if_val = gomp_flags & (1u << 10);
1688   int nogroup = gomp_flags & (1u << 11);
1689   int up = gomp_flags & (1u << 8);
1690   p_task_dup_t task_dup = NULL;
1691   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1692 #ifdef KMP_DEBUG
1693   {
1694     char *buff;
1695     buff = __kmp_str_format(
1696         "GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p "
1697         "arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu "
1698         "priority:%%d start:%%%s end:%%%s step:%%%s\n",
1699         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1700     KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align,
1701                   gomp_flags, num_tasks, priority, start, end, step));
1702     __kmp_str_free(&buff);
1703   }
1704 #endif
1705   KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T));
1706   KMP_ASSERT(arg_align > 0);
1707   // The low-order bit is the "untied" flag
1708   if (!(gomp_flags & 1)) {
1709     input_flags->tiedness = 1;
1710   }
1711   // The second low-order bit is the "final" flag
1712   if (gomp_flags & 2) {
1713     input_flags->final = 1;
1714   }
1715   // Negative step flag
1716   if (!up) {
1717     // If step is flagged as negative, but isn't properly sign extended
1718     // Then manually sign extend it.  Could be a short, int, char embedded
1719     // in a long.  So cannot assume any cast.
1720     if (step > 0) {
1721       for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) {
1722         // break at the first 1 bit
1723         if (step & ((T)1 << i))
1724           break;
1725         step |= ((T)1 << i);
1726       }
1727     }
1728   }
1729   input_flags->native = 1;
1730   // Figure out if none/grainsize/num_tasks clause specified
1731   if (num_tasks > 0) {
1732     if (gomp_flags & (1u << 9))
1733       sched = 1; // grainsize specified
1734     else
1735       sched = 2; // num_tasks specified
1736     // neither grainsize nor num_tasks specified
1737   } else {
1738     sched = 0;
1739   }
1740
1741   // __kmp_task_alloc() sets up all other flags
1742   kmp_task_t *task =
1743       __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t),
1744                        arg_size + arg_align - 1, (kmp_routine_entry_t)func);
1745   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1746   taskdata->td_copy_func = copy_func;
1747   taskdata->td_size_loop_bounds = sizeof(T);
1748
1749   // re-align shareds if needed and setup firstprivate copy constructors
1750   // through the task_dup mechanism
1751   task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1752                            arg_align * arg_align);
1753   if (copy_func) {
1754     task_dup = __kmp_gomp_task_dup;
1755   }
1756   KMP_MEMCPY(task->shareds, data, arg_size);
1757
1758   loop_bounds = (T *)task->shareds;
1759   loop_bounds[0] = start;
1760   loop_bounds[1] = end + (up ? -1 : 1);
1761   __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]),
1762                   (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup,
1763                   sched, (kmp_uint64)num_tasks, (void *)task_dup);
1764 }
1765
1766 // 4 byte version of GOMP_doacross_post
1767 // This verison needs to create a temporary array which converts 4 byte
1768 // integers into 8 byte integeres
1769 template <typename T, bool need_conversion = (sizeof(long) == 4)>
1770 void __kmp_GOMP_doacross_post(T *count);
1771
1772 template <> void __kmp_GOMP_doacross_post<long, true>(long *count) {
1773   int gtid = __kmp_entry_gtid();
1774   kmp_info_t *th = __kmp_threads[gtid];
1775   MKLOC(loc, "GOMP_doacross_post");
1776   kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1777   kmp_int64 *vec =
1778       (kmp_int64 *)__kmp_thread_malloc(th, sizeof(kmp_int64) * num_dims);
1779   for (kmp_int64 i = 0; i < num_dims; ++i) {
1780     vec[i] = (kmp_int64)count[i];
1781   }
1782   __kmpc_doacross_post(&loc, gtid, vec);
1783   __kmp_thread_free(th, vec);
1784 }
1785
1786 // 8 byte versions of GOMP_doacross_post
1787 // This version can just pass in the count array directly instead of creating
1788 // a temporary array
1789 template <> void __kmp_GOMP_doacross_post<long, false>(long *count) {
1790   int gtid = __kmp_entry_gtid();
1791   MKLOC(loc, "GOMP_doacross_post");
1792   __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1793 }
1794
1795 template <typename T> void __kmp_GOMP_doacross_wait(T first, va_list args) {
1796   int gtid = __kmp_entry_gtid();
1797   kmp_info_t *th = __kmp_threads[gtid];
1798   MKLOC(loc, "GOMP_doacross_wait");
1799   kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1800   kmp_int64 *vec =
1801       (kmp_int64 *)__kmp_thread_malloc(th, sizeof(kmp_int64) * num_dims);
1802   vec[0] = (kmp_int64)first;
1803   for (kmp_int64 i = 1; i < num_dims; ++i) {
1804     T item = va_arg(args, T);
1805     vec[i] = (kmp_int64)item;
1806   }
1807   __kmpc_doacross_wait(&loc, gtid, vec);
1808   __kmp_thread_free(th, vec);
1809   return;
1810 }
1811
1812 #ifdef __cplusplus
1813 extern "C" {
1814 #endif // __cplusplus
1815
1816 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)(
1817     void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1818     long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1819     int priority, long start, long end, long step) {
1820   __GOMP_taskloop<long>(func, data, copy_func, arg_size, arg_align, gomp_flags,
1821                         num_tasks, priority, start, end, step);
1822 }
1823
1824 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)(
1825     void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1826     long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1827     int priority, unsigned long long start, unsigned long long end,
1828     unsigned long long step) {
1829   __GOMP_taskloop<unsigned long long>(func, data, copy_func, arg_size,
1830                                       arg_align, gomp_flags, num_tasks,
1831                                       priority, start, end, step);
1832 }
1833
1834 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) {
1835   __kmp_GOMP_doacross_post(count);
1836 }
1837
1838 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) {
1839   va_list args;
1840   va_start(args, first);
1841   __kmp_GOMP_doacross_wait<long>(first, args);
1842   va_end(args);
1843 }
1844
1845 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)(
1846     unsigned long long *count) {
1847   int gtid = __kmp_entry_gtid();
1848   MKLOC(loc, "GOMP_doacross_ull_post");
1849   __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1850 }
1851
1852 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(
1853     unsigned long long first, ...) {
1854   va_list args;
1855   va_start(args, first);
1856   __kmp_GOMP_doacross_wait<unsigned long long>(first, args);
1857   va_end(args);
1858 }
1859
1860 #endif // OMP_45_ENABLED
1861
1862 /* The following sections of code create aliases for the GOMP_* functions, then
1863    create versioned symbols using the assembler directive .symver. This is only
1864    pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
1865    kmp_os.h  */
1866
1867 #ifdef KMP_USE_VERSION_SYMBOLS
1868 // GOMP_1.0 versioned symbols
1869 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
1870 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
1871 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
1872 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
1873 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
1874 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
1875 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
1876 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
1877 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
1878 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
1879 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
1880 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
1881 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
1882 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
1883 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10,
1884                    "GOMP_1.0");
1885 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
1886 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
1887 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
1888 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10,
1889                    "GOMP_1.0");
1890 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
1891 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
1892 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
1893 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
1894 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
1895 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
1896 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
1897 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
1898 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
1899 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10,
1900                    "GOMP_1.0");
1901 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10,
1902                    "GOMP_1.0");
1903 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10,
1904                    "GOMP_1.0");
1905 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10,
1906                    "GOMP_1.0");
1907 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
1908 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
1909 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
1910 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
1911 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
1912 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
1913 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
1914 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
1915 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
1916
1917 // GOMP_2.0 versioned symbols
1918 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
1919 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
1920 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
1921 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
1922 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
1923 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
1924 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20,
1925                    "GOMP_2.0");
1926 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20,
1927                    "GOMP_2.0");
1928 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20,
1929                    "GOMP_2.0");
1930 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20,
1931                    "GOMP_2.0");
1932 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20,
1933                    "GOMP_2.0");
1934 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20,
1935                    "GOMP_2.0");
1936 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20,
1937                    "GOMP_2.0");
1938 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20,
1939                    "GOMP_2.0");
1940 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
1941 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
1942 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
1943 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
1944
1945 // GOMP_3.0 versioned symbols
1946 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
1947
1948 // GOMP_4.0 versioned symbols
1949 #if OMP_40_ENABLED
1950 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0");
1951 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0");
1952 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0");
1953 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0");
1954 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0");
1955 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0");
1956 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0");
1957 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0");
1958 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0");
1959 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0");
1960 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0");
1961 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0");
1962 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0");
1963 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0");
1964 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0");
1965 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0");
1966 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0");
1967 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0");
1968 #endif
1969
1970 // GOMP_4.5 versioned symbols
1971 #if OMP_45_ENABLED
1972 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5");
1973 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5");
1974 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5");
1975 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5");
1976 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45,
1977                    "GOMP_4.5");
1978 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45,
1979                    "GOMP_4.5");
1980 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45,
1981                    "GOMP_4.5");
1982 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45,
1983                    "GOMP_4.5");
1984 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5");
1985 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5");
1986 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45,
1987                    "GOMP_4.5");
1988 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45,
1989                    "GOMP_4.5");
1990 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
1991                    "GOMP_4.5");
1992 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
1993                    "GOMP_4.5");
1994 #endif
1995
1996 #endif // KMP_USE_VERSION_SYMBOLS
1997
1998 #ifdef __cplusplus
1999 } // extern "C"
2000 #endif // __cplusplus