contrib/llvm-project/llvm/include/llvm/Support/Threading.h

   1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file declares helper functions for running LLVM in a multi-threaded
  10 // environment.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_SUPPORT_THREADING_H
  15 #define LLVM_SUPPORT_THREADING_H
  16
  17 #include "llvm/ADT/BitVector.h"
  18 #include "llvm/ADT/FunctionExtras.h"
  19 #include "llvm/ADT/SmallVector.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
  22 #include "llvm/Support/Compiler.h"
  23 #include <ciso646> // So we can check the C++ standard lib macros.
  24 #include <functional>
  25
  26 #if defined(_MSC_VER)
  27 // MSVC's call_once implementation worked since VS 2015, which is the minimum
  28 // supported version as of this writing.
  29 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
  30 #elif defined(LLVM_ON_UNIX) &&                                                 \
  31     (defined(_LIBCPP_VERSION) ||                                               \
  32      !(defined(__NetBSD__) || defined(__OpenBSD__) ||                          \
  33        (defined(__ppc__) || defined(__PPC__))))
  34 // std::call_once from libc++ is used on all Unix platforms. Other
  35 // implementations like libstdc++ are known to have problems on NetBSD,
  36 // OpenBSD and PowerPC.
  37 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
  38 #elif defined(LLVM_ON_UNIX) &&                                                 \
  39     ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
  40 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
  41 #else
  42 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
  43 #endif
  44
  45 #if LLVM_THREADING_USE_STD_CALL_ONCE
  46 #include <mutex>
  47 #else
  48 #include "llvm/Support/Atomic.h"
  49 #endif
  50
  51 namespace llvm {
  52 class Twine;
  53
  54 /// Returns true if LLVM is compiled with support for multi-threading, and
  55 /// false otherwise.
  56 bool llvm_is_multithreaded();
  57
  58 #if LLVM_THREADING_USE_STD_CALL_ONCE
  59
  60   typedef std::once_flag once_flag;
  61
  62 #else
  63
  64   enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
  65
  66   /// The llvm::once_flag structure
  67   ///
  68   /// This type is modeled after std::once_flag to use with llvm::call_once.
  69   /// This structure must be used as an opaque object. It is a struct to force
  70   /// autoinitialization and behave like std::once_flag.
  71   struct once_flag {
  72     volatile sys::cas_flag status = Uninitialized;
  73   };
  74
  75 #endif
  76
  77   /// Execute the function specified as a parameter once.
  78   ///
  79   /// Typical usage:
  80   /// \code
  81   ///   void foo() {...};
  82   ///   ...
  83   ///   static once_flag flag;
  84   ///   call_once(flag, foo);
  85   /// \endcode
  86   ///
  87   /// \param flag Flag used for tracking whether or not this has run.
  88   /// \param F Function to call once.
  89   template <typename Function, typename... Args>
  90   void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
  91 #if LLVM_THREADING_USE_STD_CALL_ONCE
  92     std::call_once(flag, std::forward<Function>(F),
  93                    std::forward<Args>(ArgList)...);
  94 #else
  95     // For other platforms we use a generic (if brittle) version based on our
  96     // atomics.
  97     sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
  98     if (old_val == Uninitialized) {
  99       std::forward<Function>(F)(std::forward<Args>(ArgList)...);
 100       sys::MemoryFence();
 101       TsanIgnoreWritesBegin();
 102       TsanHappensBefore(&flag.status);
 103       flag.status = Done;
 104       TsanIgnoreWritesEnd();
 105     } else {
 106       // Wait until any thread doing the call has finished.
 107       sys::cas_flag tmp = flag.status;
 108       sys::MemoryFence();
 109       while (tmp != Done) {
 110         tmp = flag.status;
 111         sys::MemoryFence();
 112       }
 113     }
 114     TsanHappensAfter(&flag.status);
 115 #endif
 116   }
 117
 118   /// This tells how a thread pool will be used
 119   class ThreadPoolStrategy {
 120   public:
 121     // The default value (0) means all available threads should be used,
 122     // taking the affinity mask into account. If set, this value only represents
 123     // a suggested high bound, the runtime might choose a lower value (not
 124     // higher).
 125     unsigned ThreadsRequested = 0;
 126
 127     // If SMT is active, use hyper threads. If false, there will be only one
 128     // std::thread per core.
 129     bool UseHyperThreads = true;
 130
 131     // If set, will constrain 'ThreadsRequested' to the number of hardware
 132     // threads, or hardware cores.
 133     bool Limit = false;
 134
 135     /// Retrieves the max available threads for the current strategy. This
 136     /// accounts for affinity masks and takes advantage of all CPU sockets.
 137     unsigned compute_thread_count() const;
 138
 139     /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
 140     /// multi-socket system, this ensures threads are assigned to all CPU
 141     /// sockets. \p ThreadPoolNum represents a number bounded by [0,
 142     /// compute_thread_count()).
 143     void apply_thread_strategy(unsigned ThreadPoolNum) const;
 144
 145     /// Finds the CPU socket where a thread should go. Returns 'None' if the
 146     /// thread shall remain on the actual CPU socket.
 147     Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
 148   };
 149
 150   /// Build a strategy from a number of threads as a string provided in \p Num.
 151   /// When Num is above the max number of threads specified by the \p Default
 152   /// strategy, we attempt to equally allocate the threads on all CPU sockets.
 153   /// "0" or an empty string will return the \p Default strategy.
 154   /// "all" for using all hardware threads.
 155   Optional<ThreadPoolStrategy>
 156   get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
 157
 158   /// Returns a thread strategy for tasks requiring significant memory or other
 159   /// resources. To be used for workloads where hardware_concurrency() proves to
 160   /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
 161   /// based on physical cores, if available for the host system, otherwise falls
 162   /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
 163   /// LLVM_ENABLE_THREADS = OFF.
 164   inline ThreadPoolStrategy
 165   heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
 166     ThreadPoolStrategy S;
 167     S.UseHyperThreads = false;
 168     S.ThreadsRequested = ThreadCount;
 169     return S;
 170   }
 171
 172   /// Like heavyweight_hardware_concurrency() above, but builds a strategy
 173   /// based on the rules described for get_threadpool_strategy().
 174   /// If \p Num is invalid, returns a default strategy where one thread per
 175   /// hardware core is used.
 176   inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
 177     Optional<ThreadPoolStrategy> S =
 178         get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
 179     if (S)
 180       return *S;
 181     return heavyweight_hardware_concurrency();
 182   }
 183
 184   /// Returns a default thread strategy where all available hardware resources
 185   /// are to be used, except for those initially excluded by an affinity mask.
 186   /// This function takes affinity into consideration. Returns 1 when LLVM is
 187   /// configured with LLVM_ENABLE_THREADS=OFF.
 188   inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
 189     ThreadPoolStrategy S;
 190     S.ThreadsRequested = ThreadCount;
 191     return S;
 192   }
 193
 194   /// Returns an optimal thread strategy to execute specified amount of tasks.
 195   /// This strategy should prevent us from creating too many threads if we
 196   /// occasionaly have an unexpectedly small amount of tasks.
 197   inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
 198     ThreadPoolStrategy S;
 199     S.Limit = true;
 200     S.ThreadsRequested = TaskCount;
 201     return S;
 202   }
 203
 204   /// Return the current thread id, as used in various OS system calls.
 205   /// Note that not all platforms guarantee that the value returned will be
 206   /// unique across the entire system, so portable code should not assume
 207   /// this.
 208   uint64_t get_threadid();
 209
 210   /// Get the maximum length of a thread name on this platform.
 211   /// A value of 0 means there is no limit.
 212   uint32_t get_max_thread_name_length();
 213
 214   /// Set the name of the current thread.  Setting a thread's name can
 215   /// be helpful for enabling useful diagnostics under a debugger or when
 216   /// logging.  The level of support for setting a thread's name varies
 217   /// wildly across operating systems, and we only make a best effort to
 218   /// perform the operation on supported platforms.  No indication of success
 219   /// or failure is returned.
 220   void set_thread_name(const Twine &Name);
 221
 222   /// Get the name of the current thread.  The level of support for
 223   /// getting a thread's name varies wildly across operating systems, and it
 224   /// is not even guaranteed that if you can successfully set a thread's name
 225   /// that you can later get it back.  This function is intended for diagnostic
 226   /// purposes, and as with setting a thread's name no indication of whether
 227   /// the operation succeeded or failed is returned.
 228   void get_thread_name(SmallVectorImpl<char> &Name);
 229
 230   /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
 231   /// group, the calling thread can be executed. On Windows, threads cannot
 232   /// cross CPU sockets boundaries.
 233   llvm::BitVector get_thread_affinity_mask();
 234
 235   /// Returns how many physical CPUs or NUMA groups the system has.
 236   unsigned get_cpus();
 237
 238   enum class ThreadPriority {
 239     Background = 0,
 240     Default = 1,
 241   };
 242   /// If priority is Background tries to lower current threads priority such
 243   /// that it does not affect foreground tasks significantly. Can be used for
 244   /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
 245   /// this task.
 246   /// If the priority is default tries to restore current threads priority to
 247   /// default scheduling priority.
 248   enum class SetThreadPriorityResult { FAILURE, SUCCESS };
 249   SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
 250 }
 251
 252 #endif