2 * kmp_wait_release.h -- Wait/Release implementation
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
18 #include "kmp_stats.h"
20 #include "ompt-specific.h"
24 @defgroup WAIT_RELEASE Wait/Release operations
26 The definitions and functions here implement the lowest level thread
27 synchronizations of suspending a thread and awaking it. They are used to build
28 higher level operations such as barriers and fork/join.
37 * The flag_type describes the storage used for the flag.
40 flag32, /**< 32 bit flags */
41 flag64, /**< 64 bit flags */
42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
46 * Base class for wait/release volatile flag
48 template <typename P> class kmp_flag_native {
54 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55 volatile P *get() { return loc; }
56 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57 void set(volatile P *new_loc) { loc = new_loc; }
58 flag_type get_type() { return t; }
59 P load() { return *loc; }
60 void store(P val) { *loc = val; }
64 * Base class for wait/release atomic flag
66 template <typename P> class kmp_flag {
68 *loc; /**< Pointer to the flag storage that is modified by another thread
70 flag_type t; /**< "Type" of the flag in loc */
73 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
75 * @result the pointer to the actual flag
77 std::atomic<P> *get() { return loc; }
79 * @result void* pointer to the actual flag
81 void *get_void_p() { return RCAST(void *, loc); }
83 * @param new_loc in set loc to point at new_loc
85 void set(std::atomic<P> *new_loc) { loc = new_loc; }
87 * @result the flag_type
89 flag_type get_type() { return t; }
93 P load() { return loc->load(std::memory_order_acquire); }
95 * @param val the new flag value to be stored
97 void store(P val) { loc->store(val, std::memory_order_release); }
98 // Derived classes must provide the following:
100 kmp_info_t * get_waiter(kmp_uint32 i);
101 kmp_uint32 get_num_waiters();
103 bool done_check_val(P old_loc);
104 bool notdone_check();
105 P internal_release();
106 void suspend(int th_gtid);
107 void resume(int th_gtid);
111 bool is_any_sleeping();
112 bool is_sleeping_val(P old_loc);
113 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
115 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123 ompt_state_t ompt_state,
125 int ds_tid = this_thr->th.th_info.ds.ds_tid;
126 if (ompt_state == ompt_state_wait_barrier_implicit) {
127 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
129 void *codeptr = NULL;
130 if (ompt_enabled.ompt_callback_sync_region_wait) {
131 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141 if (!KMP_MASTER_TID(ds_tid)) {
142 if (ompt_enabled.ompt_callback_implicit_task) {
143 int flags = this_thr->th.ompt_thread_info.parallel_flags;
144 flags = (flags & ompt_parallel_league) ? ompt_task_initial
145 : ompt_task_implicit;
146 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
147 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
149 // return to idle state
150 this_thr->th.ompt_thread_info.state = ompt_state_idle;
152 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
158 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
159 __kmp_wait_* must make certain that another thread calls __kmp_release
160 to wake it back up to prevent deadlocks!
162 NOTE: We may not belong to a team at this point. */
163 template <class C, int final_spin, bool cancellable = false,
164 bool sleepable = true>
166 __kmp_wait_template(kmp_info_t *this_thr,
167 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
168 #if USE_ITT_BUILD && USE_ITT_NOTIFY
169 volatile void *spin = flag->get();
173 int tasks_completed = FALSE;
176 kmp_uint64 poll_count;
177 kmp_uint64 hibernate_goal;
179 kmp_uint32 hibernate;
182 KMP_FSYNC_SPIN_INIT(spin, NULL);
183 if (flag->done_check()) {
184 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
187 th_gtid = this_thr->th.th_info.ds.ds_gtid;
189 kmp_team_t *team = this_thr->th.th_team;
190 if (team && team->t.t_cancel_request == cancel_parallel)
195 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
198 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
199 #if KMP_STATS_ENABLED
200 stats_state_e thread_state = KMP_GET_THREAD_STATE();
204 THIS function is called from
205 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
206 these have join / fork behavior
208 In these cases, we don't change the state or trigger events in THIS
210 Events are triggered in the calling code (__kmp_barrier):
212 state := ompt_state_overhead
215 state := ompt_state_wait_barrier
216 call join-barrier-implementation (finally arrive here)
218 call fork-barrier-implementation (finally arrive here)
220 state := ompt_state_overhead
223 state := ompt_state_work_parallel
226 __kmp_fork_barrier (after thread creation, before executing implicit task)
227 call fork-barrier-implementation (finally arrive here)
228 {} // worker arrive here with state = ompt_state_idle
231 __kmp_join_barrier (implicit barrier at end of parallel region)
232 state := ompt_state_barrier_implicit
235 call join-barrier-implementation (finally arrive here
239 __kmp_fork_barrier (implicit barrier at end of parallel region)
240 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
242 Worker after task-team is finished:
247 state := ompt_state_idle
249 Before leaving, if state = ompt_state_idle
251 state := ompt_state_overhead
254 ompt_state_t ompt_entry_state;
256 if (ompt_enabled.enabled) {
257 ompt_entry_state = this_thr->th.ompt_thread_info.state;
258 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
259 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
260 ompt_lw_taskteam_t *team =
261 this_thr->th.th_team->t.ompt_serialized_team_info;
263 tId = &(team->ompt_task_info.task_data);
265 tId = OMPT_CUR_TASK_DATA(this_thr);
268 tId = &(this_thr->th.ompt_thread_info.task_data);
270 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
271 this_thr->th.th_task_team == NULL)) {
272 // implicit task is done. Either no taskqueue, or task-team finished
273 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
278 KMP_INIT_YIELD(spins); // Setup for waiting
280 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
281 __kmp_pause_status == kmp_soft_paused) {
283 // The worker threads cannot rely on the team struct existing at this point.
284 // Use the bt values cached in the thread struct instead.
285 #ifdef KMP_ADJUST_BLOCKTIME
286 if (__kmp_pause_status == kmp_soft_paused ||
287 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
288 // Force immediate suspend if not set by user and more threads than
292 hibernate = this_thr->th.th_team_bt_intervals;
294 hibernate = this_thr->th.th_team_bt_intervals;
295 #endif /* KMP_ADJUST_BLOCKTIME */
297 /* If the blocktime is nonzero, we want to make sure that we spin wait for
298 the entirety of the specified #intervals, plus up to one interval more.
299 This increment make certain that this thread doesn't go to sleep too
304 // Add in the current time value.
305 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
306 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
307 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
308 hibernate - __kmp_global.g.g_time.dt.t_value));
310 if (__kmp_pause_status == kmp_soft_paused) {
311 // Force immediate suspend
312 hibernate_goal = KMP_NOW();
314 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
316 #endif // KMP_USE_MONITOR
319 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
322 // Main wait spin loop
323 while (flag->notdone_check()) {
324 kmp_task_team_t *task_team = NULL;
325 if (__kmp_tasking_mode != tskm_immediate_exec) {
326 task_team = this_thr->th.th_task_team;
327 /* If the thread's task team pointer is NULL, it means one of 3 things:
328 1) A newly-created thread is first being released by
329 __kmp_fork_barrier(), and its task team has not been set up yet.
330 2) All tasks have been executed to completion.
331 3) Tasking is off for this region. This could be because we are in a
332 serialized region (perhaps the outer one), or else tasking was manually
333 disabled (KMP_TASKING=0). */
334 if (task_team != NULL) {
335 if (TCR_SYNC_4(task_team->tt.tt_active)) {
336 if (KMP_TASKING_ENABLED(task_team))
338 this_thr, th_gtid, final_spin,
339 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
341 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
343 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
345 // task-team is done now, other cases should be catched above
346 if (final_spin && ompt_enabled.enabled)
347 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
349 this_thr->th.th_task_team = NULL;
350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
353 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
357 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
358 if (TCR_4(__kmp_global.g.g_done)) {
359 if (__kmp_global.g.g_abort)
360 __kmp_abort_thread();
364 // If we are oversubscribed, or have waited a bit (and
365 // KMP_LIBRARY=throughput), then yield
366 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
368 #if KMP_STATS_ENABLED
369 // Check if thread has been signalled to idle state
370 // This indicates that the logical "join-barrier" has finished
371 if (this_thr->th.th_stats->isIdle() &&
372 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
373 KMP_SET_THREAD_STATE(IDLE);
374 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
377 // Check if the barrier surrounding this wait loop has been cancelled
379 kmp_team_t *team = this_thr->th.th_team;
380 if (team && team->t.t_cancel_request == cancel_parallel)
384 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
385 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
386 __kmp_pause_status != kmp_soft_paused)
389 // Don't suspend if there is a likelihood of new tasks being spawned.
390 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
394 // If we have waited a bit more, fall asleep
395 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
398 if (KMP_BLOCKING(hibernate_goal, poll_count++))
401 // Don't suspend if wait loop designated non-sleepable
402 // in template parameters
406 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
407 __kmp_pause_status != kmp_soft_paused)
410 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
414 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
416 flag->suspend(th_gtid);
419 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
422 if (TCR_4(__kmp_global.g.g_done)) {
423 if (__kmp_global.g.g_abort)
424 __kmp_abort_thread();
426 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
427 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
428 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
430 // TODO: If thread is done with work and times out, disband/free
434 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
435 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
438 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
439 ompt_exit_state = this_thr->th.ompt_thread_info.state;
442 if (ompt_exit_state == ompt_state_idle) {
443 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
447 #if KMP_STATS_ENABLED
448 // If we were put into idle state, pop that off the state stack
449 if (KMP_GET_THREAD_STATE() == IDLE) {
450 KMP_POP_PARTITIONED_TIMER();
451 KMP_SET_THREAD_STATE(thread_state);
452 this_thr->th.th_stats->resetIdleFlag();
458 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
460 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
462 kmp_team_t *team = this_thr->th.th_team;
463 if (team && team->t.t_cancel_request == cancel_parallel) {
464 if (tasks_completed) {
465 // undo the previous decrement of unfinished_threads so that the
466 // thread can decrement at the join barrier with no problem
467 kmp_task_team_t *task_team = this_thr->th.th_task_team;
468 std::atomic<kmp_int32> *unfinished_threads =
469 &(task_team->tt.tt_unfinished_threads);
470 KMP_ATOMIC_INC(unfinished_threads);
478 /* Release any threads specified as waiting on the flag by releasing the flag
479 and resume the waiting thread if indicated by the sleep bit(s). A thread that
480 calls __kmp_wait_template must call this function to wake up the potentially
481 sleeping thread and prevent deadlocks! */
482 template <class C> static inline void __kmp_release_template(C *flag) {
484 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
486 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
487 KMP_DEBUG_ASSERT(flag->get());
488 KMP_FSYNC_RELEASING(flag->get_void_p());
490 flag->internal_release();
492 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
496 // Only need to check sleep stuff if infinite block time not set.
497 // Are *any* threads waiting on flag sleeping?
498 if (flag->is_any_sleeping()) {
499 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
500 // if sleeping waiter exists at i, sets current_waiter to i inside flag
501 kmp_info_t *waiter = flag->get_waiter(i);
503 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
504 // Wake up thread if needed
505 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
507 gtid, wait_gtid, flag->get()));
508 flag->resume(wait_gtid); // unsets flag's current_waiter when done
515 template <typename FlagType> struct flag_traits {};
517 template <> struct flag_traits<kmp_uint32> {
518 typedef kmp_uint32 flag_t;
519 static const flag_type t = flag32;
520 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
521 static inline flag_t test_then_add4(volatile flag_t *f) {
522 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
524 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
525 return KMP_TEST_THEN_OR32(f, v);
527 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
528 return KMP_TEST_THEN_AND32(f, v);
532 template <> struct flag_traits<kmp_uint64> {
533 typedef kmp_uint64 flag_t;
534 static const flag_type t = flag64;
535 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
536 static inline flag_t test_then_add4(volatile flag_t *f) {
537 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
539 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
540 return KMP_TEST_THEN_OR64(f, v);
542 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
543 return KMP_TEST_THEN_AND64(f, v);
547 // Basic flag that does not use C11 Atomics
548 template <typename FlagType>
549 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
550 typedef flag_traits<FlagType> traits_type;
551 FlagType checker; /**< Value to compare flag to to check if flag has been
554 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
556 num_waiting_threads; /**< Number of threads sleeping on this thread. */
558 kmp_basic_flag_native(volatile FlagType *p)
559 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
560 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
561 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
562 waiting_threads[0] = thr;
564 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
565 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
566 num_waiting_threads(0) {}
568 * param i in index into waiting_threads
569 * @result the thread that is waiting at index i
571 kmp_info_t *get_waiter(kmp_uint32 i) {
572 KMP_DEBUG_ASSERT(i < num_waiting_threads);
573 return waiting_threads[i];
576 * @result num_waiting_threads
578 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
580 * @param thr in the thread which is now waiting
582 * Insert a waiting thread at index 0.
584 void set_waiter(kmp_info_t *thr) {
585 waiting_threads[0] = thr;
586 num_waiting_threads = 1;
589 * @result true if the flag object has been released.
591 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
593 * @param old_loc in old value of flag
594 * @result true if the flag's old value indicates it was released.
596 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
598 * @result true if the flag object is not yet released.
599 * Used in __kmp_wait_template like:
601 * while (flag.notdone_check()) { pause(); }
604 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
606 * @result Actual flag value before release was applied.
607 * Trigger all waiting threads to run by modifying flag to release state.
609 void internal_release() {
610 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
613 * @result Actual flag value before sleep bit(s) set.
614 * Notes that there is at least one thread sleeping on the flag by setting
617 FlagType set_sleeping() {
618 return traits_type::test_then_or((volatile FlagType *)this->get(),
619 KMP_BARRIER_SLEEP_STATE);
622 * @result Actual flag value before sleep bit(s) cleared.
623 * Notes that there are no longer threads sleeping on the flag by clearing
626 FlagType unset_sleeping() {
627 return traits_type::test_then_and((volatile FlagType *)this->get(),
628 ~KMP_BARRIER_SLEEP_STATE);
631 * @param old_loc in old value of flag
632 * Test whether there are threads sleeping on the flag's old value in old_loc.
634 bool is_sleeping_val(FlagType old_loc) {
635 return old_loc & KMP_BARRIER_SLEEP_STATE;
638 * Test whether there are threads sleeping on the flag.
640 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
641 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
642 kmp_uint8 *get_stolen() { return NULL; }
643 enum barrier_type get_bt() { return bs_last_barrier; }
646 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
647 typedef flag_traits<FlagType> traits_type;
648 FlagType checker; /**< Value to compare flag to to check if flag has been
651 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
653 num_waiting_threads; /**< Number of threads sleeping on this thread. */
655 kmp_basic_flag(std::atomic<FlagType> *p)
656 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
657 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
658 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
659 waiting_threads[0] = thr;
661 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
662 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
663 num_waiting_threads(0) {}
665 * param i in index into waiting_threads
666 * @result the thread that is waiting at index i
668 kmp_info_t *get_waiter(kmp_uint32 i) {
669 KMP_DEBUG_ASSERT(i < num_waiting_threads);
670 return waiting_threads[i];
673 * @result num_waiting_threads
675 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
677 * @param thr in the thread which is now waiting
679 * Insert a waiting thread at index 0.
681 void set_waiter(kmp_info_t *thr) {
682 waiting_threads[0] = thr;
683 num_waiting_threads = 1;
686 * @result true if the flag object has been released.
688 bool done_check() { return this->load() == checker; }
690 * @param old_loc in old value of flag
691 * @result true if the flag's old value indicates it was released.
693 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
695 * @result true if the flag object is not yet released.
696 * Used in __kmp_wait_template like:
698 * while (flag.notdone_check()) { pause(); }
701 bool notdone_check() { return this->load() != checker; }
703 * @result Actual flag value before release was applied.
704 * Trigger all waiting threads to run by modifying flag to release state.
706 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
708 * @result Actual flag value before sleep bit(s) set.
709 * Notes that there is at least one thread sleeping on the flag by setting
712 FlagType set_sleeping() {
713 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
716 * @result Actual flag value before sleep bit(s) cleared.
717 * Notes that there are no longer threads sleeping on the flag by clearing
720 FlagType unset_sleeping() {
721 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
724 * @param old_loc in old value of flag
725 * Test whether there are threads sleeping on the flag's old value in old_loc.
727 bool is_sleeping_val(FlagType old_loc) {
728 return old_loc & KMP_BARRIER_SLEEP_STATE;
731 * Test whether there are threads sleeping on the flag.
733 bool is_sleeping() { return is_sleeping_val(this->load()); }
734 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
735 kmp_uint8 *get_stolen() { return NULL; }
736 enum barrier_type get_bt() { return bs_last_barrier; }
739 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
741 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
742 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
743 : kmp_basic_flag<kmp_uint32>(p, thr) {}
744 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
745 : kmp_basic_flag<kmp_uint32>(p, c) {}
746 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
747 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
748 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
749 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
750 kmp_int32 is_constrained) {
751 return __kmp_execute_tasks_32(
752 this_thr, gtid, this, final_spin,
753 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
755 void wait(kmp_info_t *this_thr,
756 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
758 __kmp_wait_template<kmp_flag_32, TRUE>(
759 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
761 __kmp_wait_template<kmp_flag_32, FALSE>(
762 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
764 void release() { __kmp_release_template(this); }
765 flag_type get_ptr_type() { return flag32; }
768 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
770 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
771 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
772 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
773 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
774 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
775 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
776 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
777 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
778 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
779 kmp_int32 is_constrained) {
780 return __kmp_execute_tasks_64(
781 this_thr, gtid, this, final_spin,
782 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
784 void wait(kmp_info_t *this_thr,
785 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
787 __kmp_wait_template<kmp_flag_64, TRUE>(
788 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
790 __kmp_wait_template<kmp_flag_64, FALSE>(
791 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
793 bool wait_cancellable_nosleep(kmp_info_t *this_thr,
795 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
798 retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
799 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
801 retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
802 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
805 void release() { __kmp_release_template(this); }
806 flag_type get_ptr_type() { return flag64; }
809 // Hierarchical 64-bit on-core barrier instantiation
810 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
812 kmp_info_t *waiting_threads[1];
813 kmp_uint32 num_waiting_threads;
815 offset; /**< Portion of flag that is of interest for an operation. */
816 bool flag_switch; /**< Indicates a switch in flag location. */
817 enum barrier_type bt; /**< Barrier type. */
818 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
822 itt_sync_obj; /**< ITT object that must be passed to new flag location. */
824 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
825 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
829 kmp_flag_oncore(volatile kmp_uint64 *p)
830 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831 flag_switch(false) {}
832 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
833 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
834 offset(idx), flag_switch(false) {}
835 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
836 enum barrier_type bar_t,
837 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
838 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
839 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
840 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
841 kmp_info_t *get_waiter(kmp_uint32 i) {
842 KMP_DEBUG_ASSERT(i < num_waiting_threads);
843 return waiting_threads[i];
845 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
846 void set_waiter(kmp_info_t *thr) {
847 waiting_threads[0] = thr;
848 num_waiting_threads = 1;
850 bool done_check_val(kmp_uint64 old_loc) {
851 return byteref(&old_loc, offset) == checker;
853 bool done_check() { return done_check_val(*get()); }
854 bool notdone_check() {
855 // Calculate flag_switch
856 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
858 if (byteref(get(), offset) != 1 && !flag_switch)
860 else if (flag_switch) {
861 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
862 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
863 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
864 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
868 void internal_release() {
869 // Other threads can write their own bytes simultaneously.
870 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
871 byteref(get(), offset) = 1;
874 byteref(&mask, offset) = 1;
875 KMP_TEST_THEN_OR64(get(), mask);
878 kmp_uint64 set_sleeping() {
879 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
881 kmp_uint64 unset_sleeping() {
882 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
884 bool is_sleeping_val(kmp_uint64 old_loc) {
885 return old_loc & KMP_BARRIER_SLEEP_STATE;
887 bool is_sleeping() { return is_sleeping_val(*get()); }
888 bool is_any_sleeping() { return is_sleeping_val(*get()); }
889 void wait(kmp_info_t *this_thr, int final_spin) {
891 __kmp_wait_template<kmp_flag_oncore, TRUE>(
892 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
894 __kmp_wait_template<kmp_flag_oncore, FALSE>(
895 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
897 void release() { __kmp_release_template(this); }
898 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
899 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
900 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
901 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
902 kmp_int32 is_constrained) {
903 return __kmp_execute_tasks_oncore(
904 this_thr, gtid, this, final_spin,
905 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
907 kmp_uint8 *get_stolen() { return NULL; }
908 enum barrier_type get_bt() { return bt; }
909 flag_type get_ptr_type() { return flag_oncore; }
912 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
913 // associated with int gtid.
914 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
918 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
920 __kmp_resume_32(gtid, NULL);
923 __kmp_resume_64(gtid, NULL);
926 __kmp_resume_oncore(gtid, NULL);
935 #endif // KMP_WAIT_RELEASE_H