1 //===- lld/Core/Parallel.h - Parallel utilities ---------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #ifndef LLD_CORE_PARALLEL_H
11 #define LLD_CORE_PARALLEL_H
13 #include "lld/Core/Instrumentation.h"
14 #include "lld/Core/LLVM.h"
15 #include "llvm/Support/MathExtras.h"
16 #include "llvm/Support/thread.h"
20 #include <condition_variable>
24 #if defined(_MSC_VER) && LLVM_ENABLE_THREADS
30 /// \brief Allows one or more threads to wait on a potentially unknown number of
33 /// A latch starts at \p count. inc() increments this, and dec() decrements it.
34 /// All calls to sync() will block while the count is not 0.
36 /// Calling dec() on a Latch with a count of 0 has undefined behaivor.
39 mutable std::mutex _condMut;
40 mutable std::condition_variable _cond;
43 explicit Latch(uint32_t count = 0) : _count(count) {}
47 std::unique_lock<std::mutex> lock(_condMut);
52 std::unique_lock<std::mutex> lock(_condMut);
58 std::unique_lock<std::mutex> lock(_condMut);
59 _cond.wait(lock, [&] {
65 // Classes in this namespace are implementation details of this header.
68 /// \brief An abstract class that takes closures and runs them asynchronously.
71 virtual ~Executor() = default;
72 virtual void add(std::function<void()> func) = 0;
75 #if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
76 class SyncExecutor : public Executor {
78 virtual void add(std::function<void()> func) {
83 inline Executor *getDefaultExecutor() {
84 static SyncExecutor exec;
87 #elif defined(_MSC_VER)
88 /// \brief An Executor that runs tasks via ConcRT.
89 class ConcRTExecutor : public Executor {
91 Taskish(std::function<void()> task) : _task(task) {}
93 std::function<void()> _task;
95 static void run(void *p) {
96 Taskish *self = static_cast<Taskish *>(p);
98 concurrency::Free(self);
103 virtual void add(std::function<void()> func) {
104 Concurrency::CurrentScheduler::ScheduleTask(Taskish::run,
105 new (concurrency::Alloc(sizeof(Taskish))) Taskish(func));
109 inline Executor *getDefaultExecutor() {
110 static ConcRTExecutor exec;
114 /// \brief An implementation of an Executor that runs closures on a thread pool
116 class ThreadPoolExecutor : public Executor {
118 explicit ThreadPoolExecutor(unsigned threadCount =
119 std::thread::hardware_concurrency())
120 : _stop(false), _done(threadCount) {
121 // Spawn all but one of the threads in another thread as spawning threads
123 std::thread([&, threadCount] {
124 for (size_t i = 1; i < threadCount; ++i) {
133 ~ThreadPoolExecutor() override {
134 std::unique_lock<std::mutex> lock(_mutex);
141 void add(std::function<void()> f) override {
142 std::unique_lock<std::mutex> lock(_mutex);
151 std::unique_lock<std::mutex> lock(_mutex);
152 _cond.wait(lock, [&] {
153 return _stop || !_workStack.empty();
157 auto task = _workStack.top();
165 std::atomic<bool> _stop;
166 std::stack<std::function<void()>> _workStack;
168 std::condition_variable _cond;
172 inline Executor *getDefaultExecutor() {
173 static ThreadPoolExecutor exec;
178 } // namespace internal
180 /// \brief Allows launching a number of tasks and waiting for them to finish
181 /// either explicitly via sync() or implicitly on destruction.
186 void spawn(std::function<void()> f) {
188 internal::getDefaultExecutor()->add([&, f] {
194 void sync() const { _latch.sync(); }
197 #if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
198 template <class RandomAccessIterator, class Comp>
200 RandomAccessIterator start, RandomAccessIterator end,
201 const Comp &comp = std::less<
202 typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
203 std::sort(start, end, comp);
205 #elif defined(_MSC_VER)
206 // Use ppl parallel_sort on Windows.
207 template <class RandomAccessIterator, class Comp>
209 RandomAccessIterator start, RandomAccessIterator end,
210 const Comp &comp = std::less<
211 typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
212 concurrency::parallel_sort(start, end, comp);
216 const ptrdiff_t minParallelSize = 1024;
218 /// \brief Inclusive median.
219 template <class RandomAccessIterator, class Comp>
220 RandomAccessIterator medianOf3(RandomAccessIterator start,
221 RandomAccessIterator end, const Comp &comp) {
222 RandomAccessIterator mid = start + (std::distance(start, end) / 2);
223 return comp(*start, *(end - 1))
224 ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start)
226 : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1)
230 template <class RandomAccessIterator, class Comp>
231 void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end,
232 const Comp &comp, TaskGroup &tg, size_t depth) {
233 // Do a sequential sort for small inputs.
234 if (std::distance(start, end) < detail::minParallelSize || depth == 0) {
235 std::sort(start, end, comp);
240 auto pivot = medianOf3(start, end, comp);
241 // Move pivot to end.
242 std::swap(*(end - 1), *pivot);
243 pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) {
244 return comp(v, *(end - 1));
246 // Move pivot to middle of partition.
247 std::swap(*pivot, *(end - 1));
250 tg.spawn([=, &comp, &tg] {
251 parallel_quick_sort(start, pivot, comp, tg, depth - 1);
253 parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1);
257 template <class RandomAccessIterator, class Comp>
259 RandomAccessIterator start, RandomAccessIterator end,
260 const Comp &comp = std::less<
261 typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
263 detail::parallel_quick_sort(start, end, comp, tg,
264 llvm::Log2_64(std::distance(start, end)) + 1);
268 template <class T> void parallel_sort(T *start, T *end) {
269 parallel_sort(start, end, std::less<T>());
272 #if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
273 template <class IterTy, class FuncTy>
274 void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
275 std::for_each(Begin, End, Fn);
278 template <class IndexTy, class FuncTy>
279 void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
280 for (IndexTy I = Begin; I != End; ++I)
283 #elif defined(_MSC_VER)
284 // Use ppl parallel_for_each on Windows.
285 template <class IterTy, class FuncTy>
286 void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
287 concurrency::parallel_for_each(Begin, End, Fn);
290 template <class IndexTy, class FuncTy>
291 void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
292 concurrency::parallel_for(Begin, End, Fn);
295 template <class IterTy, class FuncTy>
296 void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
297 // TaskGroup has a relatively high overhead, so we want to reduce
298 // the number of spawn() calls. We'll create up to 1024 tasks here.
299 // (Note that 1024 is an arbitrary number. This code probably needs
300 // improving to take the number of available cores into account.)
301 ptrdiff_t TaskSize = std::distance(Begin, End) / 1024;
306 while (TaskSize <= std::distance(Begin, End)) {
307 Tg.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); });
310 Tg.spawn([=, &Fn] { std::for_each(Begin, End, Fn); });
313 template <class IndexTy, class FuncTy>
314 void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
315 ptrdiff_t TaskSize = (End - Begin) / 1024;
321 for (; I < End; I += TaskSize) {
323 for (IndexTy J = I, E = I + TaskSize; J != E; ++J)
329 for (IndexTy J = I; J < End; ++J)
334 } // end namespace lld
336 #endif // LLD_CORE_PARALLEL_H