2 * kmp_wait_release.h -- Wait/Release implementation
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
18 #include "kmp_stats.h"
20 #include "ompt-specific.h"
24 @defgroup WAIT_RELEASE Wait/Release operations
26 The definitions and functions here implement the lowest level thread
27 synchronizations of suspending a thread and awaking it. They are used to build
28 higher level operations such as barriers and fork/join.
37 * The flag_type describes the storage used for the flag.
40 flag32, /**< 32 bit flags */
41 flag64, /**< 64 bit flags */
42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
46 * Base class for wait/release volatile flag
48 template <typename P> class kmp_flag_native {
54 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55 volatile P *get() { return loc; }
56 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57 void set(volatile P *new_loc) { loc = new_loc; }
58 flag_type get_type() { return t; }
59 P load() { return *loc; }
60 void store(P val) { *loc = val; }
64 * Base class for wait/release atomic flag
66 template <typename P> class kmp_flag {
68 *loc; /**< Pointer to the flag storage that is modified by another thread
70 flag_type t; /**< "Type" of the flag in loc */
73 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
75 * @result the pointer to the actual flag
77 std::atomic<P> *get() { return loc; }
79 * @result void* pointer to the actual flag
81 void *get_void_p() { return RCAST(void *, loc); }
83 * @param new_loc in set loc to point at new_loc
85 void set(std::atomic<P> *new_loc) { loc = new_loc; }
87 * @result the flag_type
89 flag_type get_type() { return t; }
93 P load() { return loc->load(std::memory_order_acquire); }
95 * @param val the new flag value to be stored
97 void store(P val) { loc->store(val, std::memory_order_release); }
98 // Derived classes must provide the following:
100 kmp_info_t * get_waiter(kmp_uint32 i);
101 kmp_uint32 get_num_waiters();
103 bool done_check_val(P old_loc);
104 bool notdone_check();
105 P internal_release();
106 void suspend(int th_gtid);
107 void resume(int th_gtid);
111 bool is_any_sleeping();
112 bool is_sleeping_val(P old_loc);
113 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
115 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123 ompt_state_t ompt_state,
125 int ds_tid = this_thr->th.th_info.ds.ds_tid;
126 if (ompt_state == ompt_state_wait_barrier_implicit) {
127 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
129 void *codeptr = NULL;
130 if (ompt_enabled.ompt_callback_sync_region_wait) {
131 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141 if (!KMP_MASTER_TID(ds_tid)) {
142 if (ompt_enabled.ompt_callback_implicit_task) {
143 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
144 ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
146 // return to idle state
147 this_thr->th.ompt_thread_info.state = ompt_state_idle;
149 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
155 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
156 __kmp_wait_* must make certain that another thread calls __kmp_release
157 to wake it back up to prevent deadlocks!
159 NOTE: We may not belong to a team at this point. */
160 template <class C, int final_spin, bool cancellable = false,
161 bool sleepable = true>
163 __kmp_wait_template(kmp_info_t *this_thr,
164 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
165 #if USE_ITT_BUILD && USE_ITT_NOTIFY
166 volatile void *spin = flag->get();
170 int tasks_completed = FALSE;
173 kmp_uint64 poll_count;
174 kmp_uint64 hibernate_goal;
176 kmp_uint32 hibernate;
179 KMP_FSYNC_SPIN_INIT(spin, NULL);
180 if (flag->done_check()) {
181 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
184 th_gtid = this_thr->th.th_info.ds.ds_gtid;
186 kmp_team_t *team = this_thr->th.th_team;
187 if (team && team->t.t_cancel_request == cancel_parallel)
192 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
195 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
196 #if KMP_STATS_ENABLED
197 stats_state_e thread_state = KMP_GET_THREAD_STATE();
201 THIS function is called from
202 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
203 these have join / fork behavior
205 In these cases, we don't change the state or trigger events in THIS
207 Events are triggered in the calling code (__kmp_barrier):
209 state := ompt_state_overhead
212 state := ompt_state_wait_barrier
213 call join-barrier-implementation (finally arrive here)
215 call fork-barrier-implementation (finally arrive here)
217 state := ompt_state_overhead
220 state := ompt_state_work_parallel
223 __kmp_fork_barrier (after thread creation, before executing implicit task)
224 call fork-barrier-implementation (finally arrive here)
225 {} // worker arrive here with state = ompt_state_idle
228 __kmp_join_barrier (implicit barrier at end of parallel region)
229 state := ompt_state_barrier_implicit
232 call join-barrier-implementation (finally arrive here
236 __kmp_fork_barrier (implicit barrier at end of parallel region)
237 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
239 Worker after task-team is finished:
244 state := ompt_state_idle
246 Before leaving, if state = ompt_state_idle
248 state := ompt_state_overhead
251 ompt_state_t ompt_entry_state;
253 if (ompt_enabled.enabled) {
254 ompt_entry_state = this_thr->th.ompt_thread_info.state;
255 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
256 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
257 ompt_lw_taskteam_t *team =
258 this_thr->th.th_team->t.ompt_serialized_team_info;
260 tId = &(team->ompt_task_info.task_data);
262 tId = OMPT_CUR_TASK_DATA(this_thr);
265 tId = &(this_thr->th.ompt_thread_info.task_data);
267 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
268 this_thr->th.th_task_team == NULL)) {
269 // implicit task is done. Either no taskqueue, or task-team finished
270 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
275 KMP_INIT_YIELD(spins); // Setup for waiting
277 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
278 __kmp_pause_status == kmp_soft_paused) {
280 // The worker threads cannot rely on the team struct existing at this point.
281 // Use the bt values cached in the thread struct instead.
282 #ifdef KMP_ADJUST_BLOCKTIME
283 if (__kmp_pause_status == kmp_soft_paused ||
284 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
285 // Force immediate suspend if not set by user and more threads than
289 hibernate = this_thr->th.th_team_bt_intervals;
291 hibernate = this_thr->th.th_team_bt_intervals;
292 #endif /* KMP_ADJUST_BLOCKTIME */
294 /* If the blocktime is nonzero, we want to make sure that we spin wait for
295 the entirety of the specified #intervals, plus up to one interval more.
296 This increment make certain that this thread doesn't go to sleep too
301 // Add in the current time value.
302 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
303 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
304 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
305 hibernate - __kmp_global.g.g_time.dt.t_value));
307 if (__kmp_pause_status == kmp_soft_paused) {
308 // Force immediate suspend
309 hibernate_goal = KMP_NOW();
311 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
313 #endif // KMP_USE_MONITOR
316 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
319 // Main wait spin loop
320 while (flag->notdone_check()) {
321 kmp_task_team_t *task_team = NULL;
322 if (__kmp_tasking_mode != tskm_immediate_exec) {
323 task_team = this_thr->th.th_task_team;
324 /* If the thread's task team pointer is NULL, it means one of 3 things:
325 1) A newly-created thread is first being released by
326 __kmp_fork_barrier(), and its task team has not been set up yet.
327 2) All tasks have been executed to completion.
328 3) Tasking is off for this region. This could be because we are in a
329 serialized region (perhaps the outer one), or else tasking was manually
330 disabled (KMP_TASKING=0). */
331 if (task_team != NULL) {
332 if (TCR_SYNC_4(task_team->tt.tt_active)) {
333 if (KMP_TASKING_ENABLED(task_team))
335 this_thr, th_gtid, final_spin,
336 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
338 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
340 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
342 // task-team is done now, other cases should be catched above
343 if (final_spin && ompt_enabled.enabled)
344 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
346 this_thr->th.th_task_team = NULL;
347 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
354 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
355 if (TCR_4(__kmp_global.g.g_done)) {
356 if (__kmp_global.g.g_abort)
357 __kmp_abort_thread();
361 // If we are oversubscribed, or have waited a bit (and
362 // KMP_LIBRARY=throughput), then yield
363 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
365 #if KMP_STATS_ENABLED
366 // Check if thread has been signalled to idle state
367 // This indicates that the logical "join-barrier" has finished
368 if (this_thr->th.th_stats->isIdle() &&
369 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
370 KMP_SET_THREAD_STATE(IDLE);
371 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
374 // Check if the barrier surrounding this wait loop has been cancelled
376 kmp_team_t *team = this_thr->th.th_team;
377 if (team && team->t.t_cancel_request == cancel_parallel)
381 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
382 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
383 __kmp_pause_status != kmp_soft_paused)
386 // Don't suspend if there is a likelihood of new tasks being spawned.
387 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
391 // If we have waited a bit more, fall asleep
392 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
395 if (KMP_BLOCKING(hibernate_goal, poll_count++))
398 // Don't suspend if wait loop designated non-sleepable
399 // in template parameters
403 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
404 __kmp_pause_status != kmp_soft_paused)
407 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
411 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
413 flag->suspend(th_gtid);
416 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
419 if (TCR_4(__kmp_global.g.g_done)) {
420 if (__kmp_global.g.g_abort)
421 __kmp_abort_thread();
423 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
424 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
425 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
427 // TODO: If thread is done with work and times out, disband/free
431 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
432 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
435 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
436 ompt_exit_state = this_thr->th.ompt_thread_info.state;
439 if (ompt_exit_state == ompt_state_idle) {
440 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
444 #if KMP_STATS_ENABLED
445 // If we were put into idle state, pop that off the state stack
446 if (KMP_GET_THREAD_STATE() == IDLE) {
447 KMP_POP_PARTITIONED_TIMER();
448 KMP_SET_THREAD_STATE(thread_state);
449 this_thr->th.th_stats->resetIdleFlag();
455 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
457 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
459 kmp_team_t *team = this_thr->th.th_team;
460 if (team && team->t.t_cancel_request == cancel_parallel) {
461 if (tasks_completed) {
462 // undo the previous decrement of unfinished_threads so that the
463 // thread can decrement at the join barrier with no problem
464 kmp_task_team_t *task_team = this_thr->th.th_task_team;
465 std::atomic<kmp_int32> *unfinished_threads =
466 &(task_team->tt.tt_unfinished_threads);
467 KMP_ATOMIC_INC(unfinished_threads);
475 /* Release any threads specified as waiting on the flag by releasing the flag
476 and resume the waiting thread if indicated by the sleep bit(s). A thread that
477 calls __kmp_wait_template must call this function to wake up the potentially
478 sleeping thread and prevent deadlocks! */
479 template <class C> static inline void __kmp_release_template(C *flag) {
481 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
483 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
484 KMP_DEBUG_ASSERT(flag->get());
485 KMP_FSYNC_RELEASING(flag->get_void_p());
487 flag->internal_release();
489 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
492 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
493 // Only need to check sleep stuff if infinite block time not set.
494 // Are *any* threads waiting on flag sleeping?
495 if (flag->is_any_sleeping()) {
496 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
497 // if sleeping waiter exists at i, sets current_waiter to i inside flag
498 kmp_info_t *waiter = flag->get_waiter(i);
500 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
501 // Wake up thread if needed
502 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
504 gtid, wait_gtid, flag->get()));
505 flag->resume(wait_gtid); // unsets flag's current_waiter when done
512 template <typename FlagType> struct flag_traits {};
514 template <> struct flag_traits<kmp_uint32> {
515 typedef kmp_uint32 flag_t;
516 static const flag_type t = flag32;
517 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
518 static inline flag_t test_then_add4(volatile flag_t *f) {
519 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
521 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
522 return KMP_TEST_THEN_OR32(f, v);
524 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
525 return KMP_TEST_THEN_AND32(f, v);
529 template <> struct flag_traits<kmp_uint64> {
530 typedef kmp_uint64 flag_t;
531 static const flag_type t = flag64;
532 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
533 static inline flag_t test_then_add4(volatile flag_t *f) {
534 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
536 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
537 return KMP_TEST_THEN_OR64(f, v);
539 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
540 return KMP_TEST_THEN_AND64(f, v);
544 // Basic flag that does not use C11 Atomics
545 template <typename FlagType>
546 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
547 typedef flag_traits<FlagType> traits_type;
548 FlagType checker; /**< Value to compare flag to to check if flag has been
551 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
553 num_waiting_threads; /**< Number of threads sleeping on this thread. */
555 kmp_basic_flag_native(volatile FlagType *p)
556 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
557 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
558 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
559 waiting_threads[0] = thr;
561 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
562 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
563 num_waiting_threads(0) {}
565 * param i in index into waiting_threads
566 * @result the thread that is waiting at index i
568 kmp_info_t *get_waiter(kmp_uint32 i) {
569 KMP_DEBUG_ASSERT(i < num_waiting_threads);
570 return waiting_threads[i];
573 * @result num_waiting_threads
575 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
577 * @param thr in the thread which is now waiting
579 * Insert a waiting thread at index 0.
581 void set_waiter(kmp_info_t *thr) {
582 waiting_threads[0] = thr;
583 num_waiting_threads = 1;
586 * @result true if the flag object has been released.
588 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
590 * @param old_loc in old value of flag
591 * @result true if the flag's old value indicates it was released.
593 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
595 * @result true if the flag object is not yet released.
596 * Used in __kmp_wait_template like:
598 * while (flag.notdone_check()) { pause(); }
601 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
603 * @result Actual flag value before release was applied.
604 * Trigger all waiting threads to run by modifying flag to release state.
606 void internal_release() {
607 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
610 * @result Actual flag value before sleep bit(s) set.
611 * Notes that there is at least one thread sleeping on the flag by setting
614 FlagType set_sleeping() {
615 return traits_type::test_then_or((volatile FlagType *)this->get(),
616 KMP_BARRIER_SLEEP_STATE);
619 * @result Actual flag value before sleep bit(s) cleared.
620 * Notes that there are no longer threads sleeping on the flag by clearing
623 FlagType unset_sleeping() {
624 return traits_type::test_then_and((volatile FlagType *)this->get(),
625 ~KMP_BARRIER_SLEEP_STATE);
628 * @param old_loc in old value of flag
629 * Test whether there are threads sleeping on the flag's old value in old_loc.
631 bool is_sleeping_val(FlagType old_loc) {
632 return old_loc & KMP_BARRIER_SLEEP_STATE;
635 * Test whether there are threads sleeping on the flag.
637 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
638 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
639 kmp_uint8 *get_stolen() { return NULL; }
640 enum barrier_type get_bt() { return bs_last_barrier; }
643 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
644 typedef flag_traits<FlagType> traits_type;
645 FlagType checker; /**< Value to compare flag to to check if flag has been
648 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
650 num_waiting_threads; /**< Number of threads sleeping on this thread. */
652 kmp_basic_flag(std::atomic<FlagType> *p)
653 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
654 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
655 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
656 waiting_threads[0] = thr;
658 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
659 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
660 num_waiting_threads(0) {}
662 * param i in index into waiting_threads
663 * @result the thread that is waiting at index i
665 kmp_info_t *get_waiter(kmp_uint32 i) {
666 KMP_DEBUG_ASSERT(i < num_waiting_threads);
667 return waiting_threads[i];
670 * @result num_waiting_threads
672 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
674 * @param thr in the thread which is now waiting
676 * Insert a waiting thread at index 0.
678 void set_waiter(kmp_info_t *thr) {
679 waiting_threads[0] = thr;
680 num_waiting_threads = 1;
683 * @result true if the flag object has been released.
685 bool done_check() { return this->load() == checker; }
687 * @param old_loc in old value of flag
688 * @result true if the flag's old value indicates it was released.
690 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
692 * @result true if the flag object is not yet released.
693 * Used in __kmp_wait_template like:
695 * while (flag.notdone_check()) { pause(); }
698 bool notdone_check() { return this->load() != checker; }
700 * @result Actual flag value before release was applied.
701 * Trigger all waiting threads to run by modifying flag to release state.
703 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
705 * @result Actual flag value before sleep bit(s) set.
706 * Notes that there is at least one thread sleeping on the flag by setting
709 FlagType set_sleeping() {
710 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
713 * @result Actual flag value before sleep bit(s) cleared.
714 * Notes that there are no longer threads sleeping on the flag by clearing
717 FlagType unset_sleeping() {
718 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
721 * @param old_loc in old value of flag
722 * Test whether there are threads sleeping on the flag's old value in old_loc.
724 bool is_sleeping_val(FlagType old_loc) {
725 return old_loc & KMP_BARRIER_SLEEP_STATE;
728 * Test whether there are threads sleeping on the flag.
730 bool is_sleeping() { return is_sleeping_val(this->load()); }
731 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
732 kmp_uint8 *get_stolen() { return NULL; }
733 enum barrier_type get_bt() { return bs_last_barrier; }
736 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
738 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
739 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
740 : kmp_basic_flag<kmp_uint32>(p, thr) {}
741 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
742 : kmp_basic_flag<kmp_uint32>(p, c) {}
743 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
744 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
745 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
746 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
747 kmp_int32 is_constrained) {
748 return __kmp_execute_tasks_32(
749 this_thr, gtid, this, final_spin,
750 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
752 void wait(kmp_info_t *this_thr,
753 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
755 __kmp_wait_template<kmp_flag_32, TRUE>(
756 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
758 __kmp_wait_template<kmp_flag_32, FALSE>(
759 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
761 void release() { __kmp_release_template(this); }
762 flag_type get_ptr_type() { return flag32; }
765 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
767 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
768 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
769 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
770 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
771 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
772 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
773 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
774 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
775 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
776 kmp_int32 is_constrained) {
777 return __kmp_execute_tasks_64(
778 this_thr, gtid, this, final_spin,
779 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
781 void wait(kmp_info_t *this_thr,
782 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
784 __kmp_wait_template<kmp_flag_64, TRUE>(
785 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
787 __kmp_wait_template<kmp_flag_64, FALSE>(
788 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
790 bool wait_cancellable_nosleep(kmp_info_t *this_thr,
792 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
795 retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
796 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
798 retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
799 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
802 void release() { __kmp_release_template(this); }
803 flag_type get_ptr_type() { return flag64; }
806 // Hierarchical 64-bit on-core barrier instantiation
807 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
809 kmp_info_t *waiting_threads[1];
810 kmp_uint32 num_waiting_threads;
812 offset; /**< Portion of flag that is of interest for an operation. */
813 bool flag_switch; /**< Indicates a switch in flag location. */
814 enum barrier_type bt; /**< Barrier type. */
815 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
819 itt_sync_obj; /**< ITT object that must be passed to new flag location. */
821 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
822 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
826 kmp_flag_oncore(volatile kmp_uint64 *p)
827 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
828 flag_switch(false) {}
829 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
830 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831 offset(idx), flag_switch(false) {}
832 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
833 enum barrier_type bar_t,
834 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
835 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
836 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
837 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
838 kmp_info_t *get_waiter(kmp_uint32 i) {
839 KMP_DEBUG_ASSERT(i < num_waiting_threads);
840 return waiting_threads[i];
842 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
843 void set_waiter(kmp_info_t *thr) {
844 waiting_threads[0] = thr;
845 num_waiting_threads = 1;
847 bool done_check_val(kmp_uint64 old_loc) {
848 return byteref(&old_loc, offset) == checker;
850 bool done_check() { return done_check_val(*get()); }
851 bool notdone_check() {
852 // Calculate flag_switch
853 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
855 if (byteref(get(), offset) != 1 && !flag_switch)
857 else if (flag_switch) {
858 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
859 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
860 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
861 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
865 void internal_release() {
866 // Other threads can write their own bytes simultaneously.
867 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
868 byteref(get(), offset) = 1;
871 byteref(&mask, offset) = 1;
872 KMP_TEST_THEN_OR64(get(), mask);
875 kmp_uint64 set_sleeping() {
876 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
878 kmp_uint64 unset_sleeping() {
879 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
881 bool is_sleeping_val(kmp_uint64 old_loc) {
882 return old_loc & KMP_BARRIER_SLEEP_STATE;
884 bool is_sleeping() { return is_sleeping_val(*get()); }
885 bool is_any_sleeping() { return is_sleeping_val(*get()); }
886 void wait(kmp_info_t *this_thr, int final_spin) {
888 __kmp_wait_template<kmp_flag_oncore, TRUE>(
889 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
891 __kmp_wait_template<kmp_flag_oncore, FALSE>(
892 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
894 void release() { __kmp_release_template(this); }
895 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
896 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
897 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
898 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
899 kmp_int32 is_constrained) {
900 return __kmp_execute_tasks_oncore(
901 this_thr, gtid, this, final_spin,
902 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
904 kmp_uint8 *get_stolen() { return NULL; }
905 enum barrier_type get_bt() { return bt; }
906 flag_type get_ptr_type() { return flag_oncore; }
909 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
910 // associated with int gtid.
911 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
915 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
917 __kmp_resume_32(gtid, NULL);
920 __kmp_resume_64(gtid, NULL);
923 __kmp_resume_oncore(gtid, NULL);
932 #endif // KMP_WAIT_RELEASE_H