1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file declares helper functions for running LLVM in a multi-threaded
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_SUPPORT_THREADING_H
15 #define LLVM_SUPPORT_THREADING_H
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/FunctionExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22 #include "llvm/Support/Compiler.h"
23 #include <ciso646> // So we can check the C++ standard lib macros.
27 // MSVC's call_once implementation worked since VS 2015, which is the minimum
28 // supported version as of this writing.
29 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
30 #elif defined(LLVM_ON_UNIX) && \
31 (defined(_LIBCPP_VERSION) || \
32 !(defined(__NetBSD__) || defined(__OpenBSD__) || \
33 (defined(__ppc__) || defined(__PPC__))))
34 // std::call_once from libc++ is used on all Unix platforms. Other
35 // implementations like libstdc++ are known to have problems on NetBSD,
36 // OpenBSD and PowerPC.
37 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
38 #elif defined(LLVM_ON_UNIX) && \
39 ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
40 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
42 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
45 #if LLVM_THREADING_USE_STD_CALL_ONCE
48 #include "llvm/Support/Atomic.h"
54 /// Returns true if LLVM is compiled with support for multi-threading, and
56 bool llvm_is_multithreaded();
58 #if LLVM_THREADING_USE_STD_CALL_ONCE
60 typedef std::once_flag once_flag;
64 enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
66 /// The llvm::once_flag structure
68 /// This type is modeled after std::once_flag to use with llvm::call_once.
69 /// This structure must be used as an opaque object. It is a struct to force
70 /// autoinitialization and behave like std::once_flag.
72 volatile sys::cas_flag status = Uninitialized;
77 /// Execute the function specified as a parameter once.
83 /// static once_flag flag;
84 /// call_once(flag, foo);
87 /// \param flag Flag used for tracking whether or not this has run.
88 /// \param F Function to call once.
89 template <typename Function, typename... Args>
90 void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
91 #if LLVM_THREADING_USE_STD_CALL_ONCE
92 std::call_once(flag, std::forward<Function>(F),
93 std::forward<Args>(ArgList)...);
95 // For other platforms we use a generic (if brittle) version based on our
97 sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
98 if (old_val == Uninitialized) {
99 std::forward<Function>(F)(std::forward<Args>(ArgList)...);
101 TsanIgnoreWritesBegin();
102 TsanHappensBefore(&flag.status);
104 TsanIgnoreWritesEnd();
106 // Wait until any thread doing the call has finished.
107 sys::cas_flag tmp = flag.status;
109 while (tmp != Done) {
114 TsanHappensAfter(&flag.status);
118 /// This tells how a thread pool will be used
119 class ThreadPoolStrategy {
121 // The default value (0) means all available threads should be used,
122 // taking the affinity mask into account. If set, this value only represents
123 // a suggested high bound, the runtime might choose a lower value (not
125 unsigned ThreadsRequested = 0;
127 // If SMT is active, use hyper threads. If false, there will be only one
128 // std::thread per core.
129 bool UseHyperThreads = true;
131 // If set, will constrain 'ThreadsRequested' to the number of hardware
132 // threads, or hardware cores.
135 /// Retrieves the max available threads for the current strategy. This
136 /// accounts for affinity masks and takes advantage of all CPU sockets.
137 unsigned compute_thread_count() const;
139 /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
140 /// multi-socket system, this ensures threads are assigned to all CPU
141 /// sockets. \p ThreadPoolNum represents a number bounded by [0,
142 /// compute_thread_count()).
143 void apply_thread_strategy(unsigned ThreadPoolNum) const;
145 /// Finds the CPU socket where a thread should go. Returns 'None' if the
146 /// thread shall remain on the actual CPU socket.
147 Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
150 /// Build a strategy from a number of threads as a string provided in \p Num.
151 /// When Num is above the max number of threads specified by the \p Default
152 /// strategy, we attempt to equally allocate the threads on all CPU sockets.
153 /// "0" or an empty string will return the \p Default strategy.
154 /// "all" for using all hardware threads.
155 Optional<ThreadPoolStrategy>
156 get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
158 /// Returns a thread strategy for tasks requiring significant memory or other
159 /// resources. To be used for workloads where hardware_concurrency() proves to
160 /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
161 /// based on physical cores, if available for the host system, otherwise falls
162 /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
163 /// LLVM_ENABLE_THREADS = OFF.
164 inline ThreadPoolStrategy
165 heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
166 ThreadPoolStrategy S;
167 S.UseHyperThreads = false;
168 S.ThreadsRequested = ThreadCount;
172 /// Like heavyweight_hardware_concurrency() above, but builds a strategy
173 /// based on the rules described for get_threadpool_strategy().
174 /// If \p Num is invalid, returns a default strategy where one thread per
175 /// hardware core is used.
176 inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
177 Optional<ThreadPoolStrategy> S =
178 get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
181 return heavyweight_hardware_concurrency();
184 /// Returns a default thread strategy where all available hardware resources
185 /// are to be used, except for those initially excluded by an affinity mask.
186 /// This function takes affinity into consideration. Returns 1 when LLVM is
187 /// configured with LLVM_ENABLE_THREADS=OFF.
188 inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
189 ThreadPoolStrategy S;
190 S.ThreadsRequested = ThreadCount;
194 /// Returns an optimal thread strategy to execute specified amount of tasks.
195 /// This strategy should prevent us from creating too many threads if we
196 /// occasionaly have an unexpectedly small amount of tasks.
197 inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
198 ThreadPoolStrategy S;
200 S.ThreadsRequested = TaskCount;
204 /// Return the current thread id, as used in various OS system calls.
205 /// Note that not all platforms guarantee that the value returned will be
206 /// unique across the entire system, so portable code should not assume
208 uint64_t get_threadid();
210 /// Get the maximum length of a thread name on this platform.
211 /// A value of 0 means there is no limit.
212 uint32_t get_max_thread_name_length();
214 /// Set the name of the current thread. Setting a thread's name can
215 /// be helpful for enabling useful diagnostics under a debugger or when
216 /// logging. The level of support for setting a thread's name varies
217 /// wildly across operating systems, and we only make a best effort to
218 /// perform the operation on supported platforms. No indication of success
219 /// or failure is returned.
220 void set_thread_name(const Twine &Name);
222 /// Get the name of the current thread. The level of support for
223 /// getting a thread's name varies wildly across operating systems, and it
224 /// is not even guaranteed that if you can successfully set a thread's name
225 /// that you can later get it back. This function is intended for diagnostic
226 /// purposes, and as with setting a thread's name no indication of whether
227 /// the operation succeeded or failed is returned.
228 void get_thread_name(SmallVectorImpl<char> &Name);
230 /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
231 /// group, the calling thread can be executed. On Windows, threads cannot
232 /// cross CPU sockets boundaries.
233 llvm::BitVector get_thread_affinity_mask();
235 /// Returns how many physical CPUs or NUMA groups the system has.
238 enum class ThreadPriority {
242 /// If priority is Background tries to lower current threads priority such
243 /// that it does not affect foreground tasks significantly. Can be used for
244 /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
246 /// If the priority is default tries to restore current threads priority to
247 /// default scheduling priority.
248 enum class SetThreadPriorityResult { FAILURE, SUCCESS };
249 SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);