]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/libcxx/include/barrier
Merge llvm-project main llvmorg-14-init-10186-gff7f2cfa959b
[FreeBSD/FreeBSD.git] / contrib / llvm-project / libcxx / include / barrier
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
12
13 /*
14     barrier synopsis
15
16 namespace std
17 {
18
19   template<class CompletionFunction = see below>
20   class barrier
21   {
22   public:
23     using arrival_token = see below;
24
25     static constexpr ptrdiff_t max() noexcept;
26
27     constexpr explicit barrier(ptrdiff_t phase_count,
28                                CompletionFunction f = CompletionFunction());
29     ~barrier();
30
31     barrier(const barrier&) = delete;
32     barrier& operator=(const barrier&) = delete;
33
34     [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35     void wait(arrival_token&& arrival) const;
36
37     void arrive_and_wait();
38     void arrive_and_drop();
39
40   private:
41     CompletionFunction completion; // exposition only
42   };
43
44 }
45
46 */
47
48 #include <__availability>
49 #include <__config>
50 #include <atomic>
51 #ifndef _LIBCPP_HAS_NO_TREE_BARRIER
52 # include <memory>
53 #endif
54
55 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
56 #pragma GCC system_header
57 #endif
58
59 #ifdef _LIBCPP_HAS_NO_THREADS
60 # error <barrier> is not supported on this single threaded system
61 #endif
62
63 _LIBCPP_PUSH_MACROS
64 #include <__undef_macros>
65
66 #if _LIBCPP_STD_VER >= 14
67
68 _LIBCPP_BEGIN_NAMESPACE_STD
69
70 struct __empty_completion
71 {
72     inline _LIBCPP_INLINE_VISIBILITY
73     void operator()() noexcept
74     {
75     }
76 };
77
78 #ifndef _LIBCPP_HAS_NO_TREE_BARRIER
79
80 /*
81
82 The default implementation of __barrier_base is a classic tree barrier.
83
84 It looks different from literature pseudocode for two main reasons:
85  1. Threads that call into std::barrier functions do not provide indices,
86     so a numbering step is added before the actual barrier algorithm,
87     appearing as an N+1 round to the N rounds of the tree barrier.
88  2. A great deal of attention has been paid to avoid cache line thrashing
89     by flattening the tree structure into cache-line sized arrays, that
90     are indexed in an efficient way.
91
92 */
93
94 using __barrier_phase_t = uint8_t;
95
96 class __barrier_algorithm_base;
97
98 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
99 __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
100
101 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
102 bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
103                                      __barrier_phase_t __old_phase);
104
105 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106 void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
107
108 template<class _CompletionF>
109 class __barrier_base {
110     ptrdiff_t                                               __expected;
111     unique_ptr<__barrier_algorithm_base,
112                void (*)(__barrier_algorithm_base*)>         __base;
113     __atomic_base<ptrdiff_t>                                __expected_adjustment;
114     _CompletionF                                            __completion;
115     __atomic_base<__barrier_phase_t>                        __phase;
116
117 public:
118     using arrival_token = __barrier_phase_t;
119
120     static constexpr ptrdiff_t max() noexcept {
121         return numeric_limits<ptrdiff_t>::max();
122     }
123
124     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
125     __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
126             : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
127                                              &__destroy_barrier_algorithm_base),
128               __expected_adjustment(0), __completion(move(__completion)), __phase(0)
129     {
130     }
131     [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132     arrival_token arrive(ptrdiff_t update)
133     {
134         auto const __old_phase = __phase.load(memory_order_relaxed);
135         for(; update; --update)
136             if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
137                 __completion();
138                 __expected += __expected_adjustment.load(memory_order_relaxed);
139                 __expected_adjustment.store(0, memory_order_relaxed);
140                 __phase.store(__old_phase + 2, memory_order_release);
141                 __phase.notify_all();
142             }
143         return __old_phase;
144     }
145     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
146     void wait(arrival_token&& __old_phase) const
147     {
148         auto const __test_fn = [this, __old_phase]() -> bool {
149             return __phase.load(memory_order_acquire) != __old_phase;
150         };
151         __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
152     }
153     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
154     void arrive_and_drop()
155     {
156         __expected_adjustment.fetch_sub(1, memory_order_relaxed);
157         (void)arrive(1);
158     }
159 };
160
161 #else
162
163 /*
164
165 The alternative implementation of __barrier_base is a central barrier.
166
167 Two versions of this algorithm are provided:
168  1. A fairly straightforward implementation of the litterature for the
169     general case where the completion function is not empty.
170  2. An optimized implementation that exploits 2's complement arithmetic
171     and well-defined overflow in atomic arithmetic, to handle the phase
172     roll-over for free.
173
174 */
175
176 template<class _CompletionF>
177 class __barrier_base {
178
179     __atomic_base<ptrdiff_t> __expected;
180     __atomic_base<ptrdiff_t> __arrived;
181     _CompletionF             __completion;
182     __atomic_base<bool>      __phase;
183 public:
184     using arrival_token = bool;
185
186     static constexpr ptrdiff_t max() noexcept {
187         return numeric_limits<ptrdiff_t>::max();
188     }
189
190     _LIBCPP_INLINE_VISIBILITY
191     __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
192         : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
193     {
194     }
195     [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
196     arrival_token arrive(ptrdiff_t update)
197     {
198         auto const __old_phase = __phase.load(memory_order_relaxed);
199         auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
200         auto const new_expected = __expected.load(memory_order_relaxed);
201         if(0 == __result) {
202             __completion();
203             __arrived.store(new_expected, memory_order_relaxed);
204             __phase.store(!__old_phase, memory_order_release);
205             __phase.notify_all();
206         }
207         return __old_phase;
208     }
209     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
210     void wait(arrival_token&& __old_phase) const
211     {
212         __phase.wait(__old_phase, memory_order_acquire);
213     }
214     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
215     void arrive_and_drop()
216     {
217         __expected.fetch_sub(1, memory_order_relaxed);
218         (void)arrive(1);
219     }
220 };
221
222 template<>
223 class __barrier_base<__empty_completion> {
224
225     static constexpr uint64_t __expected_unit = 1ull;
226     static constexpr uint64_t __arrived_unit = 1ull << 32;
227     static constexpr uint64_t __expected_mask = __arrived_unit - 1;
228     static constexpr uint64_t __phase_bit = 1ull << 63;
229     static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
230
231     __atomic_base<uint64_t>   __phase_arrived_expected;
232
233     static _LIBCPP_INLINE_VISIBILITY
234     constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
235     {
236         return ((uint64_t(1u << 31) - __count) << 32)
237               | (uint64_t(1u << 31) - __count);
238     }
239
240 public:
241     using arrival_token = uint64_t;
242
243     static constexpr ptrdiff_t max() noexcept {
244         return ptrdiff_t(1u << 31) - 1;
245     }
246
247     _LIBCPP_INLINE_VISIBILITY
248     explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
249         : __phase_arrived_expected(__init(__count))
250     {
251     }
252     [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
253     arrival_token arrive(ptrdiff_t update)
254     {
255         auto const __inc = __arrived_unit * update;
256         auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
257         if((__old ^ (__old + __inc)) & __phase_bit) {
258             __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
259             __phase_arrived_expected.notify_all();
260         }
261         return __old & __phase_bit;
262     }
263     inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
264     void wait(arrival_token&& __phase) const
265     {
266         auto const __test_fn = [=]() -> bool {
267             uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
268             return ((__current & __phase_bit) != __phase);
269         };
270         __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
271     }
272     inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
273     void arrive_and_drop()
274     {
275         __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
276         (void)arrive(1);
277     }
278 };
279
280 #endif //_LIBCPP_HAS_NO_TREE_BARRIER
281
282 template<class _CompletionF = __empty_completion>
283 class barrier {
284
285     __barrier_base<_CompletionF> __b;
286 public:
287     using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
288
289     static constexpr ptrdiff_t max() noexcept {
290         return __barrier_base<_CompletionF>::max();
291     }
292
293     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
294     barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
295         : __b(__count, _VSTD::move(__completion)) {
296     }
297
298     barrier(barrier const&) = delete;
299     barrier& operator=(barrier const&) = delete;
300
301     [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
302     arrival_token arrive(ptrdiff_t update = 1)
303     {
304         return __b.arrive(update);
305     }
306     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
307     void wait(arrival_token&& __phase) const
308     {
309         __b.wait(_VSTD::move(__phase));
310     }
311     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312     void arrive_and_wait()
313     {
314         wait(arrive());
315     }
316     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
317     void arrive_and_drop()
318     {
319         __b.arrive_and_drop();
320     }
321 };
322
323 _LIBCPP_END_NAMESPACE_STD
324
325 #endif // _LIBCPP_STD_VER >= 14
326
327 _LIBCPP_POP_MACROS
328
329 #endif //_LIBCPP_BARRIER