include/lld/Core/Parallel.h

   1 //===- lld/Core/Parallel.h - Parallel utilities ---------------------------===//
   2 //
   3 //                             The LLVM Linker
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef LLD_CORE_PARALLEL_H
  11 #define LLD_CORE_PARALLEL_H
  12
  13 #include "lld/Core/Instrumentation.h"
  14 #include "lld/Core/LLVM.h"
  15 #include "lld/Core/range.h"
  16 #include "llvm/Support/MathExtras.h"
  17
  18 #ifdef _MSC_VER
  19 // concrt.h depends on eh.h for __uncaught_exception declaration
  20 // even if we disable exceptions.
  21 #include <eh.h>
  22 #endif
  23
  24 #include <algorithm>
  25 #include <atomic>
  26 #include <condition_variable>
  27 #include <mutex>
  28 #include <thread>
  29 #include <stack>
  30
  31 #ifdef _MSC_VER
  32 #include <concrt.h>
  33 #include <ppl.h>
  34 #endif
  35
  36 namespace lld {
  37 /// \brief Allows one or more threads to wait on a potentially unknown number of
  38 ///   events.
  39 ///
  40 /// A latch starts at \p count. inc() increments this, and dec() decrements it.
  41 /// All calls to sync() will block while the count is not 0.
  42 ///
  43 /// Calling dec() on a Latch with a count of 0 has undefined behaivor.
  44 class Latch {
  45   uint32_t _count;
  46   mutable std::mutex _condMut;
  47   mutable std::condition_variable _cond;
  48
  49 public:
  50   explicit Latch(uint32_t count = 0) : _count(count) {}
  51   ~Latch() { sync(); }
  52
  53   void inc() {
  54     std::unique_lock<std::mutex> lock(_condMut);
  55     ++_count;
  56   }
  57
  58   void dec() {
  59     std::unique_lock<std::mutex> lock(_condMut);
  60     if (--_count == 0)
  61       _cond.notify_all();
  62   }
  63
  64   void sync() const {
  65     std::unique_lock<std::mutex> lock(_condMut);
  66     _cond.wait(lock, [&] {
  67       return _count == 0;
  68     });
  69   }
  70 };
  71
  72 /// \brief An implementation of future. std::future and std::promise in
  73 /// old libstdc++ have a threading bug; there is a small chance that a
  74 /// call of future::get throws an exception in the normal use case.
  75 /// We want to use our own future implementation until we drop support
  76 /// of old versions of libstdc++.
  77 /// https://gcc.gnu.org/ml/gcc-patches/2014-05/msg01389.html
  78 template<typename T> class Future {
  79 public:
  80   Future() : _hasValue(false) {}
  81
  82   void set(T &&val) {
  83     assert(!_hasValue);
  84     {
  85       std::unique_lock<std::mutex> lock(_mutex);
  86       _val = val;
  87       _hasValue = true;
  88     }
  89     _cond.notify_all();
  90   }
  91
  92   T &get() {
  93     std::unique_lock<std::mutex> lock(_mutex);
  94     if (_hasValue)
  95       return _val;
  96     _cond.wait(lock, [&] { return _hasValue; });
  97     return _val;
  98   }
  99
 100 private:
 101   T _val;
 102   bool _hasValue;
 103   std::mutex _mutex;
 104   std::condition_variable _cond;
 105 };
 106
 107 /// \brief An abstract class that takes closures and runs them asynchronously.
 108 class Executor {
 109 public:
 110   virtual ~Executor() {}
 111   virtual void add(std::function<void()> func) = 0;
 112 };
 113
 114 /// \brief An implementation of an Executor that runs closures on a thread pool
 115 ///   in filo order.
 116 class ThreadPoolExecutor : public Executor {
 117 public:
 118   explicit ThreadPoolExecutor(unsigned threadCount =
 119                                   std::thread::hardware_concurrency())
 120       : _stop(false), _done(threadCount) {
 121     // Spawn all but one of the threads in another thread as spawning threads
 122     // can take a while.
 123     std::thread([&, threadCount] {
 124       for (std::size_t i = 1; i < threadCount; ++i) {
 125         std::thread([=] {
 126           work();
 127         }).detach();
 128       }
 129       work();
 130     }).detach();
 131   }
 132
 133   ~ThreadPoolExecutor() {
 134     std::unique_lock<std::mutex> lock(_mutex);
 135     _stop = true;
 136     lock.unlock();
 137     _cond.notify_all();
 138     // Wait for ~Latch.
 139   }
 140
 141   void add(std::function<void()> f) override {
 142     std::unique_lock<std::mutex> lock(_mutex);
 143     _workStack.push(f);
 144     lock.unlock();
 145     _cond.notify_one();
 146   }
 147
 148 private:
 149   void work() {
 150     while (true) {
 151       std::unique_lock<std::mutex> lock(_mutex);
 152       _cond.wait(lock, [&] {
 153         return _stop || !_workStack.empty();
 154       });
 155       if (_stop)
 156         break;
 157       auto task = _workStack.top();
 158       _workStack.pop();
 159       lock.unlock();
 160       task();
 161     }
 162     _done.dec();
 163   }
 164
 165   std::atomic<bool> _stop;
 166   std::stack<std::function<void()>> _workStack;
 167   std::mutex _mutex;
 168   std::condition_variable _cond;
 169   Latch _done;
 170 };
 171
 172 #ifdef _MSC_VER
 173 /// \brief An Executor that runs tasks via ConcRT.
 174 class ConcRTExecutor : public Executor {
 175   struct Taskish {
 176     Taskish(std::function<void()> task) : _task(task) {}
 177
 178     std::function<void()> _task;
 179
 180     static void run(void *p) {
 181       Taskish *self = static_cast<Taskish *>(p);
 182       self->_task();
 183       concurrency::Free(self);
 184     }
 185   };
 186
 187 public:
 188   virtual void add(std::function<void()> func) {
 189     Concurrency::CurrentScheduler::ScheduleTask(Taskish::run,
 190         new (concurrency::Alloc(sizeof(Taskish))) Taskish(func));
 191   }
 192 };
 193
 194 inline Executor *getDefaultExecutor() {
 195   static ConcRTExecutor exec;
 196   return &exec;
 197 }
 198 #else
 199 inline Executor *getDefaultExecutor() {
 200   static ThreadPoolExecutor exec;
 201   return &exec;
 202 }
 203 #endif
 204
 205 /// \brief Allows launching a number of tasks and waiting for them to finish
 206 ///   either explicitly via sync() or implicitly on destruction.
 207 class TaskGroup {
 208   Latch _latch;
 209
 210 public:
 211   void spawn(std::function<void()> f) {
 212     _latch.inc();
 213     getDefaultExecutor()->add([&, f] {
 214       f();
 215       _latch.dec();
 216     });
 217   }
 218
 219   void sync() const { _latch.sync(); }
 220 };
 221
 222 #ifdef _MSC_VER
 223 // Use ppl parallel_sort on Windows.
 224 template <class RandomAccessIterator, class Comp>
 225 void parallel_sort(
 226     RandomAccessIterator start, RandomAccessIterator end,
 227     const Comp &comp = std::less<
 228         typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
 229   concurrency::parallel_sort(start, end, comp);
 230 }
 231 #else
 232 namespace detail {
 233 const ptrdiff_t minParallelSize = 1024;
 234
 235 /// \brief Inclusive median.
 236 template <class RandomAccessIterator, class Comp>
 237 RandomAccessIterator medianOf3(RandomAccessIterator start,
 238                                RandomAccessIterator end, const Comp &comp) {
 239   RandomAccessIterator mid = start + (std::distance(start, end) / 2);
 240   return comp(*start, *(end - 1))
 241          ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start)
 242                                    : end - 1)
 243          : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1)
 244                                : start);
 245 }
 246
 247 template <class RandomAccessIterator, class Comp>
 248 void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end,
 249                          const Comp &comp, TaskGroup &tg, size_t depth) {
 250   // Do a sequential sort for small inputs.
 251   if (std::distance(start, end) < detail::minParallelSize || depth == 0) {
 252     std::sort(start, end, comp);
 253     return;
 254   }
 255
 256   // Partition.
 257   auto pivot = medianOf3(start, end, comp);
 258   // Move pivot to end.
 259   std::swap(*(end - 1), *pivot);
 260   pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) {
 261     return comp(v, *(end - 1));
 262   });
 263   // Move pivot to middle of partition.
 264   std::swap(*pivot, *(end - 1));
 265
 266   // Recurse.
 267   tg.spawn([=, &comp, &tg] {
 268     parallel_quick_sort(start, pivot, comp, tg, depth - 1);
 269   });
 270   parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1);
 271 }
 272 }
 273
 274 template <class RandomAccessIterator, class Comp>
 275 void parallel_sort(
 276     RandomAccessIterator start, RandomAccessIterator end,
 277     const Comp &comp = std::less<
 278         typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
 279   TaskGroup tg;
 280   detail::parallel_quick_sort(start, end, comp, tg,
 281                               llvm::Log2_64(std::distance(start, end)) + 1);
 282 }
 283 #endif
 284
 285 template <class T> void parallel_sort(T *start, T *end) {
 286   parallel_sort(start, end, std::less<T>());
 287 }
 288
 289 #ifdef _MSC_VER
 290 // Use ppl parallel_for_each on Windows.
 291 template <class Iterator, class Func>
 292 void parallel_for_each(Iterator begin, Iterator end, Func func) {
 293   concurrency::parallel_for_each(begin, end, func);
 294 }
 295 #else
 296 template <class Iterator, class Func>
 297 void parallel_for_each(Iterator begin, Iterator end, Func func) {
 298   TaskGroup tg;
 299   ptrdiff_t taskSize = 1024;
 300   while (taskSize <= std::distance(begin, end)) {
 301     tg.spawn([=, &func] { std::for_each(begin, begin + taskSize, func); });
 302     begin += taskSize;
 303   }
 304   std::for_each(begin, end, func);
 305 }
 306 #endif
 307 } // end namespace lld
 308
 309 #endif