2 * kmp_wait_release.h -- Wait/Release implementation
5 //===----------------------------------------------------------------------===//
7 // The LLVM Compiler Infrastructure
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
12 //===----------------------------------------------------------------------===//
14 #ifndef KMP_WAIT_RELEASE_H
15 #define KMP_WAIT_RELEASE_H
19 #include "kmp_stats.h"
21 #include "ompt-specific.h"
25 @defgroup WAIT_RELEASE Wait/Release operations
27 The definitions and functions here implement the lowest level thread
28 synchronizations of suspending a thread and awaking it. They are used to build
29 higher level operations such as barriers and fork/join.
38 * The flag_type describes the storage used for the flag.
41 flag32, /**< 32 bit flags */
42 flag64, /**< 64 bit flags */
43 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
47 * Base class for wait/release volatile flag
49 template <typename P> class kmp_flag_native {
55 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56 volatile P *get() { return loc; }
57 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
58 void set(volatile P *new_loc) { loc = new_loc; }
59 flag_type get_type() { return t; }
60 P load() { return *loc; }
61 void store(P val) { *loc = val; }
65 * Base class for wait/release atomic flag
67 template <typename P> class kmp_flag {
69 *loc; /**< Pointer to the flag storage that is modified by another thread
71 flag_type t; /**< "Type" of the flag in loc */
74 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
76 * @result the pointer to the actual flag
78 std::atomic<P> *get() { return loc; }
80 * @result void* pointer to the actual flag
82 void *get_void_p() { return RCAST(void *, loc); }
84 * @param new_loc in set loc to point at new_loc
86 void set(std::atomic<P> *new_loc) { loc = new_loc; }
88 * @result the flag_type
90 flag_type get_type() { return t; }
94 P load() { return loc->load(std::memory_order_acquire); }
96 * @param val the new flag value to be stored
98 void store(P val) { loc->store(val, std::memory_order_release); }
99 // Derived classes must provide the following:
101 kmp_info_t * get_waiter(kmp_uint32 i);
102 kmp_uint32 get_num_waiters();
104 bool done_check_val(P old_loc);
105 bool notdone_check();
106 P internal_release();
107 void suspend(int th_gtid);
108 void resume(int th_gtid);
112 bool is_any_sleeping();
113 bool is_sleeping_val(P old_loc);
114 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
116 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
123 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
124 ompt_state_t ompt_state,
126 int ds_tid = this_thr->th.th_info.ds.ds_tid;
127 if (ompt_state == ompt_state_wait_barrier_implicit) {
128 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
130 void *codeptr = NULL;
131 if (ompt_enabled.ompt_callback_sync_region_wait) {
132 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
133 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
140 if (!KMP_MASTER_TID(ds_tid)) {
141 if (ompt_enabled.ompt_callback_implicit_task) {
142 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
143 ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
145 // return to idle state
146 this_thr->th.ompt_thread_info.state = ompt_state_idle;
148 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
154 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
155 calls __kmp_wait_* must make certain that another thread calls __kmp_release
156 to wake it back up to prevent deadlocks!
158 NOTE: We may not belong to a team at this point. */
159 template <class C, int final_spin>
161 __kmp_wait_template(kmp_info_t *this_thr,
162 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
163 #if USE_ITT_BUILD && USE_ITT_NOTIFY
164 volatile void *spin = flag->get();
168 int tasks_completed = FALSE;
171 kmp_uint64 poll_count;
172 kmp_uint64 hibernate_goal;
174 kmp_uint32 hibernate;
177 KMP_FSYNC_SPIN_INIT(spin, NULL);
178 if (flag->done_check()) {
179 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
182 th_gtid = this_thr->th.th_info.ds.ds_gtid;
185 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
188 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
189 #if KMP_STATS_ENABLED
190 stats_state_e thread_state = KMP_GET_THREAD_STATE();
194 THIS function is called from
195 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
196 these have join / fork behavior
198 In these cases, we don't change the state or trigger events in THIS
200 Events are triggered in the calling code (__kmp_barrier):
202 state := ompt_state_overhead
205 state := ompt_state_wait_barrier
206 call join-barrier-implementation (finally arrive here)
208 call fork-barrier-implementation (finally arrive here)
210 state := ompt_state_overhead
213 state := ompt_state_work_parallel
216 __kmp_fork_barrier (after thread creation, before executing implicit task)
217 call fork-barrier-implementation (finally arrive here)
218 {} // worker arrive here with state = ompt_state_idle
221 __kmp_join_barrier (implicit barrier at end of parallel region)
222 state := ompt_state_barrier_implicit
225 call join-barrier-implementation (finally arrive here
229 __kmp_fork_barrier (implicit barrier at end of parallel region)
230 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
232 Worker after task-team is finished:
237 state := ompt_state_idle
239 Before leaving, if state = ompt_state_idle
241 state := ompt_state_overhead
244 ompt_state_t ompt_entry_state;
246 if (ompt_enabled.enabled) {
247 ompt_entry_state = this_thr->th.ompt_thread_info.state;
248 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
249 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
250 ompt_lw_taskteam_t *team =
251 this_thr->th.th_team->t.ompt_serialized_team_info;
253 tId = &(team->ompt_task_info.task_data);
255 tId = OMPT_CUR_TASK_DATA(this_thr);
258 tId = &(this_thr->th.ompt_thread_info.task_data);
260 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
261 this_thr->th.th_task_team == NULL)) {
262 // implicit task is done. Either no taskqueue, or task-team finished
263 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
269 KMP_INIT_YIELD(spins);
271 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
273 // The worker threads cannot rely on the team struct existing at this point.
274 // Use the bt values cached in the thread struct instead.
275 #ifdef KMP_ADJUST_BLOCKTIME
276 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
277 // Force immediate suspend if not set by user and more threads than
281 hibernate = this_thr->th.th_team_bt_intervals;
283 hibernate = this_thr->th.th_team_bt_intervals;
284 #endif /* KMP_ADJUST_BLOCKTIME */
286 /* If the blocktime is nonzero, we want to make sure that we spin wait for
287 the entirety of the specified #intervals, plus up to one interval more.
288 This increment make certain that this thread doesn't go to sleep too
293 // Add in the current time value.
294 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
295 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
296 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
297 hibernate - __kmp_global.g.g_time.dt.t_value));
299 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
301 #endif // KMP_USE_MONITOR
304 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
307 // Main wait spin loop
308 while (flag->notdone_check()) {
310 kmp_task_team_t *task_team = NULL;
311 if (__kmp_tasking_mode != tskm_immediate_exec) {
312 task_team = this_thr->th.th_task_team;
313 /* If the thread's task team pointer is NULL, it means one of 3 things:
314 1) A newly-created thread is first being released by
315 __kmp_fork_barrier(), and its task team has not been set up yet.
316 2) All tasks have been executed to completion.
317 3) Tasking is off for this region. This could be because we are in a
318 serialized region (perhaps the outer one), or else tasking was manually
319 disabled (KMP_TASKING=0). */
320 if (task_team != NULL) {
321 if (TCR_SYNC_4(task_team->tt.tt_active)) {
322 if (KMP_TASKING_ENABLED(task_team))
324 this_thr, th_gtid, final_spin,
325 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
327 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
329 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
331 // task-team is done now, other cases should be catched above
332 if (final_spin && ompt_enabled.enabled)
333 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
335 this_thr->th.th_task_team = NULL;
336 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
339 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
343 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
344 if (TCR_4(__kmp_global.g.g_done)) {
345 if (__kmp_global.g.g_abort)
346 __kmp_abort_thread();
350 // If we are oversubscribed, or have waited a bit (and
351 // KMP_LIBRARY=throughput), then yield
352 // TODO: Should it be number of cores instead of thread contexts? Like:
353 // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
354 // Need performance improvement data to make the change...
355 if (oversubscribed) {
358 KMP_YIELD_SPIN(spins);
360 // Check if this thread was transferred from a team
361 // to the thread pool (or vice-versa) while spinning.
362 in_pool = !!TCR_4(this_thr->th.th_in_pool);
363 if (in_pool != !!this_thr->th.th_active_in_pool) {
364 if (in_pool) { // Recently transferred from team to pool
365 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
366 this_thr->th.th_active_in_pool = TRUE;
367 /* Here, we cannot assert that:
368 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
369 __kmp_thread_pool_nth);
370 __kmp_thread_pool_nth is inc/dec'd by the master thread while the
371 fork/join lock is held, whereas __kmp_thread_pool_active_nth is
372 inc/dec'd asynchronously by the workers. The two can get out of sync
373 for brief periods of time. */
374 } else { // Recently transferred from pool to team
375 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
376 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
377 this_thr->th.th_active_in_pool = FALSE;
381 #if KMP_STATS_ENABLED
382 // Check if thread has been signalled to idle state
383 // This indicates that the logical "join-barrier" has finished
384 if (this_thr->th.th_stats->isIdle() &&
385 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
386 KMP_SET_THREAD_STATE(IDLE);
387 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
391 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
392 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
395 // Don't suspend if there is a likelihood of new tasks being spawned.
396 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
400 // If we have waited a bit more, fall asleep
401 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
404 if (KMP_BLOCKING(hibernate_goal, poll_count++))
408 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
411 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
413 flag->suspend(th_gtid);
416 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
419 if (TCR_4(__kmp_global.g.g_done)) {
420 if (__kmp_global.g.g_abort)
421 __kmp_abort_thread();
423 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
424 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
425 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
427 // TODO: If thread is done with work and times out, disband/free
431 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
432 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
435 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
436 ompt_exit_state = this_thr->th.ompt_thread_info.state;
439 if (ompt_exit_state == ompt_state_idle) {
440 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
444 #if KMP_STATS_ENABLED
445 // If we were put into idle state, pop that off the state stack
446 if (KMP_GET_THREAD_STATE() == IDLE) {
447 KMP_POP_PARTITIONED_TIMER();
448 KMP_SET_THREAD_STATE(thread_state);
449 this_thr->th.th_stats->resetIdleFlag();
455 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
457 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
460 /* Release any threads specified as waiting on the flag by releasing the flag
461 and resume the waiting thread if indicated by the sleep bit(s). A thread that
462 calls __kmp_wait_template must call this function to wake up the potentially
463 sleeping thread and prevent deadlocks! */
464 template <class C> static inline void __kmp_release_template(C *flag) {
466 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
468 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
469 KMP_DEBUG_ASSERT(flag->get());
470 KMP_FSYNC_RELEASING(flag->get_void_p());
472 flag->internal_release();
474 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
477 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
478 // Only need to check sleep stuff if infinite block time not set.
479 // Are *any* threads waiting on flag sleeping?
480 if (flag->is_any_sleeping()) {
481 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
482 // if sleeping waiter exists at i, sets current_waiter to i inside flag
483 kmp_info_t *waiter = flag->get_waiter(i);
485 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
486 // Wake up thread if needed
487 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
489 gtid, wait_gtid, flag->get()));
490 flag->resume(wait_gtid); // unsets flag's current_waiter when done
497 template <typename FlagType> struct flag_traits {};
499 template <> struct flag_traits<kmp_uint32> {
500 typedef kmp_uint32 flag_t;
501 static const flag_type t = flag32;
502 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
503 static inline flag_t test_then_add4(volatile flag_t *f) {
504 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
506 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
507 return KMP_TEST_THEN_OR32(f, v);
509 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
510 return KMP_TEST_THEN_AND32(f, v);
514 template <> struct flag_traits<kmp_uint64> {
515 typedef kmp_uint64 flag_t;
516 static const flag_type t = flag64;
517 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
518 static inline flag_t test_then_add4(volatile flag_t *f) {
519 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
521 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
522 return KMP_TEST_THEN_OR64(f, v);
524 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
525 return KMP_TEST_THEN_AND64(f, v);
529 // Basic flag that does not use C11 Atomics
530 template <typename FlagType>
531 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
532 typedef flag_traits<FlagType> traits_type;
533 FlagType checker; /**< Value to compare flag to to check if flag has been
536 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
538 num_waiting_threads; /**< Number of threads sleeping on this thread. */
540 kmp_basic_flag_native(volatile FlagType *p)
541 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
542 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
543 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
544 waiting_threads[0] = thr;
546 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
547 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
548 num_waiting_threads(0) {}
550 * param i in index into waiting_threads
551 * @result the thread that is waiting at index i
553 kmp_info_t *get_waiter(kmp_uint32 i) {
554 KMP_DEBUG_ASSERT(i < num_waiting_threads);
555 return waiting_threads[i];
558 * @result num_waiting_threads
560 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
562 * @param thr in the thread which is now waiting
564 * Insert a waiting thread at index 0.
566 void set_waiter(kmp_info_t *thr) {
567 waiting_threads[0] = thr;
568 num_waiting_threads = 1;
571 * @result true if the flag object has been released.
573 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
575 * @param old_loc in old value of flag
576 * @result true if the flag's old value indicates it was released.
578 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
580 * @result true if the flag object is not yet released.
581 * Used in __kmp_wait_template like:
583 * while (flag.notdone_check()) { pause(); }
586 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
588 * @result Actual flag value before release was applied.
589 * Trigger all waiting threads to run by modifying flag to release state.
591 void internal_release() {
592 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
595 * @result Actual flag value before sleep bit(s) set.
596 * Notes that there is at least one thread sleeping on the flag by setting
599 FlagType set_sleeping() {
600 return traits_type::test_then_or((volatile FlagType *)this->get(),
601 KMP_BARRIER_SLEEP_STATE);
604 * @result Actual flag value before sleep bit(s) cleared.
605 * Notes that there are no longer threads sleeping on the flag by clearing
608 FlagType unset_sleeping() {
609 return traits_type::test_then_and((volatile FlagType *)this->get(),
610 ~KMP_BARRIER_SLEEP_STATE);
613 * @param old_loc in old value of flag
614 * Test whether there are threads sleeping on the flag's old value in old_loc.
616 bool is_sleeping_val(FlagType old_loc) {
617 return old_loc & KMP_BARRIER_SLEEP_STATE;
620 * Test whether there are threads sleeping on the flag.
622 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
623 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
624 kmp_uint8 *get_stolen() { return NULL; }
625 enum barrier_type get_bt() { return bs_last_barrier; }
628 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
629 typedef flag_traits<FlagType> traits_type;
630 FlagType checker; /**< Value to compare flag to to check if flag has been
633 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
635 num_waiting_threads; /**< Number of threads sleeping on this thread. */
637 kmp_basic_flag(std::atomic<FlagType> *p)
638 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
639 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
640 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
641 waiting_threads[0] = thr;
643 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
644 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
645 num_waiting_threads(0) {}
647 * param i in index into waiting_threads
648 * @result the thread that is waiting at index i
650 kmp_info_t *get_waiter(kmp_uint32 i) {
651 KMP_DEBUG_ASSERT(i < num_waiting_threads);
652 return waiting_threads[i];
655 * @result num_waiting_threads
657 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
659 * @param thr in the thread which is now waiting
661 * Insert a waiting thread at index 0.
663 void set_waiter(kmp_info_t *thr) {
664 waiting_threads[0] = thr;
665 num_waiting_threads = 1;
668 * @result true if the flag object has been released.
670 bool done_check() { return this->load() == checker; }
672 * @param old_loc in old value of flag
673 * @result true if the flag's old value indicates it was released.
675 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
677 * @result true if the flag object is not yet released.
678 * Used in __kmp_wait_template like:
680 * while (flag.notdone_check()) { pause(); }
683 bool notdone_check() { return this->load() != checker; }
685 * @result Actual flag value before release was applied.
686 * Trigger all waiting threads to run by modifying flag to release state.
688 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
690 * @result Actual flag value before sleep bit(s) set.
691 * Notes that there is at least one thread sleeping on the flag by setting
694 FlagType set_sleeping() {
695 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
698 * @result Actual flag value before sleep bit(s) cleared.
699 * Notes that there are no longer threads sleeping on the flag by clearing
702 FlagType unset_sleeping() {
703 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
706 * @param old_loc in old value of flag
707 * Test whether there are threads sleeping on the flag's old value in old_loc.
709 bool is_sleeping_val(FlagType old_loc) {
710 return old_loc & KMP_BARRIER_SLEEP_STATE;
713 * Test whether there are threads sleeping on the flag.
715 bool is_sleeping() { return is_sleeping_val(this->load()); }
716 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
717 kmp_uint8 *get_stolen() { return NULL; }
718 enum barrier_type get_bt() { return bs_last_barrier; }
721 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
723 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
724 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
725 : kmp_basic_flag<kmp_uint32>(p, thr) {}
726 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
727 : kmp_basic_flag<kmp_uint32>(p, c) {}
728 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
729 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
730 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
731 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
732 kmp_int32 is_constrained) {
733 return __kmp_execute_tasks_32(
734 this_thr, gtid, this, final_spin,
735 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
737 void wait(kmp_info_t *this_thr,
738 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
740 __kmp_wait_template<kmp_flag_32, TRUE>(
741 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
743 __kmp_wait_template<kmp_flag_32, FALSE>(
744 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
746 void release() { __kmp_release_template(this); }
747 flag_type get_ptr_type() { return flag32; }
750 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
752 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
753 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
754 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
755 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
756 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
757 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
758 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
759 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
760 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
761 kmp_int32 is_constrained) {
762 return __kmp_execute_tasks_64(
763 this_thr, gtid, this, final_spin,
764 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
766 void wait(kmp_info_t *this_thr,
767 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
769 __kmp_wait_template<kmp_flag_64, TRUE>(
770 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
772 __kmp_wait_template<kmp_flag_64, FALSE>(
773 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
775 void release() { __kmp_release_template(this); }
776 flag_type get_ptr_type() { return flag64; }
779 // Hierarchical 64-bit on-core barrier instantiation
780 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
782 kmp_info_t *waiting_threads[1];
783 kmp_uint32 num_waiting_threads;
785 offset; /**< Portion of flag that is of interest for an operation. */
786 bool flag_switch; /**< Indicates a switch in flag location. */
787 enum barrier_type bt; /**< Barrier type. */
788 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
792 itt_sync_obj; /**< ITT object that must be passed to new flag location. */
794 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
795 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
799 kmp_flag_oncore(volatile kmp_uint64 *p)
800 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
801 flag_switch(false) {}
802 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
803 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
804 offset(idx), flag_switch(false) {}
805 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
806 enum barrier_type bar_t,
807 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
808 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
809 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
810 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
811 kmp_info_t *get_waiter(kmp_uint32 i) {
812 KMP_DEBUG_ASSERT(i < num_waiting_threads);
813 return waiting_threads[i];
815 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
816 void set_waiter(kmp_info_t *thr) {
817 waiting_threads[0] = thr;
818 num_waiting_threads = 1;
820 bool done_check_val(kmp_uint64 old_loc) {
821 return byteref(&old_loc, offset) == checker;
823 bool done_check() { return done_check_val(*get()); }
824 bool notdone_check() {
825 // Calculate flag_switch
826 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
828 if (byteref(get(), offset) != 1 && !flag_switch)
830 else if (flag_switch) {
831 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
832 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
833 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
834 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
838 void internal_release() {
839 // Other threads can write their own bytes simultaneously.
840 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
841 byteref(get(), offset) = 1;
844 byteref(&mask, offset) = 1;
845 KMP_TEST_THEN_OR64(get(), mask);
848 kmp_uint64 set_sleeping() {
849 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
851 kmp_uint64 unset_sleeping() {
852 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
854 bool is_sleeping_val(kmp_uint64 old_loc) {
855 return old_loc & KMP_BARRIER_SLEEP_STATE;
857 bool is_sleeping() { return is_sleeping_val(*get()); }
858 bool is_any_sleeping() { return is_sleeping_val(*get()); }
859 void wait(kmp_info_t *this_thr, int final_spin) {
861 __kmp_wait_template<kmp_flag_oncore, TRUE>(
862 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
864 __kmp_wait_template<kmp_flag_oncore, FALSE>(
865 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
867 void release() { __kmp_release_template(this); }
868 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
869 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
870 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
871 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
872 kmp_int32 is_constrained) {
873 return __kmp_execute_tasks_oncore(
874 this_thr, gtid, this, final_spin,
875 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
877 kmp_uint8 *get_stolen() { return NULL; }
878 enum barrier_type get_bt() { return bt; }
879 flag_type get_ptr_type() { return flag_oncore; }
882 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
883 // associated with int gtid.
884 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
888 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
890 __kmp_resume_32(gtid, NULL);
893 __kmp_resume_64(gtid, NULL);
896 __kmp_resume_oncore(gtid, NULL);
905 #endif // KMP_WAIT_RELEASE_H