2 * kmp_affinity.h -- header for affinity management
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
19 #if KMP_AFFINITY_SUPPORTED
21 class KMPHwlocAffinity : public KMPAffinity {
23 class Mask : public KMPAffinity::Mask {
28 mask = hwloc_bitmap_alloc();
31 ~Mask() { hwloc_bitmap_free(mask); }
32 void set(int i) override { hwloc_bitmap_set(mask, i); }
33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35 void zero() override { hwloc_bitmap_zero(mask); }
36 void copy(const KMPAffinity::Mask *src) override {
37 const Mask *convert = static_cast<const Mask *>(src);
38 hwloc_bitmap_copy(mask, convert->mask);
40 void bitwise_and(const KMPAffinity::Mask *rhs) override {
41 const Mask *convert = static_cast<const Mask *>(rhs);
42 hwloc_bitmap_and(mask, mask, convert->mask);
44 void bitwise_or(const KMPAffinity::Mask *rhs) override {
45 const Mask *convert = static_cast<const Mask *>(rhs);
46 hwloc_bitmap_or(mask, mask, convert->mask);
48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49 int begin() const override { return hwloc_bitmap_first(mask); }
50 int end() const override { return -1; }
51 int next(int previous) const override {
52 return hwloc_bitmap_next(mask, previous);
54 int get_system_affinity(bool abort_on_error) override {
55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56 "Illegal get affinity operation when not capable");
58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
68 int set_system_affinity(bool abort_on_error) const override {
69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70 "Illegal get affinity operation when not capable");
72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
82 int get_proc_group() const override {
85 if (__kmp_num_proc_groups == 1) {
88 for (int i = 0; i < __kmp_num_proc_groups; i++) {
89 // On windows, the long type is always 32 bits
90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91 unsigned long second_32_bits =
92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93 if (first_32_bits == 0 && second_32_bits == 0) {
101 #endif /* KMP_OS_WINDOWS */
105 void determine_capable(const char *var) override {
106 const hwloc_topology_support *topology_support;
107 if (__kmp_hwloc_topology == NULL) {
108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109 __kmp_hwloc_error = TRUE;
110 if (__kmp_affinity_verbose)
111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114 __kmp_hwloc_error = TRUE;
115 if (__kmp_affinity_verbose)
116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120 // Is the system capable of setting/getting this thread's affinity?
121 // Also, is topology discovery possible? (pu indicates ability to discover
122 // processing units). And finally, were there no errors when calling any
123 // hwloc_* API functions?
124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125 topology_support->cpubind->get_thisthread_cpubind &&
126 topology_support->discovery->pu && !__kmp_hwloc_error) {
127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128 KMP_AFFINITY_ENABLE(TRUE);
130 // indicate that hwloc didn't work and disable affinity
131 __kmp_hwloc_error = TRUE;
132 KMP_AFFINITY_DISABLE();
135 void bind_thread(int which) override {
136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137 "Illegal set affinity operation when not capable");
138 KMPAffinity::Mask *mask;
139 KMP_CPU_ALLOC_ON_STACK(mask);
141 KMP_CPU_SET(which, mask);
142 __kmp_set_system_affinity(mask, TRUE);
143 KMP_CPU_FREE_FROM_STACK(mask);
145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147 KMPAffinity::Mask *allocate_mask_array(int num) override {
148 return new Mask[num];
150 void deallocate_mask_array(KMPAffinity::Mask *array) override {
151 Mask *hwloc_array = static_cast<Mask *>(array);
152 delete[] hwloc_array;
154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155 int index) override {
156 Mask *hwloc_array = static_cast<Mask *>(array);
157 return &(hwloc_array[index]);
159 api_type get_api_type() const override { return HWLOC; }
161 #endif /* KMP_USE_HWLOC */
164 /* On some of the older OS's that we build on, these constants aren't present
165 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
166 all systems of the same arch where they are defined, and they cannot change.
168 #include <sys/syscall.h>
169 #if KMP_ARCH_X86 || KMP_ARCH_ARM
170 #ifndef __NR_sched_setaffinity
171 #define __NR_sched_setaffinity 241
172 #elif __NR_sched_setaffinity != 241
173 #error Wrong code for setaffinity system call.
174 #endif /* __NR_sched_setaffinity */
175 #ifndef __NR_sched_getaffinity
176 #define __NR_sched_getaffinity 242
177 #elif __NR_sched_getaffinity != 242
178 #error Wrong code for getaffinity system call.
179 #endif /* __NR_sched_getaffinity */
180 #elif KMP_ARCH_AARCH64
181 #ifndef __NR_sched_setaffinity
182 #define __NR_sched_setaffinity 122
183 #elif __NR_sched_setaffinity != 122
184 #error Wrong code for setaffinity system call.
185 #endif /* __NR_sched_setaffinity */
186 #ifndef __NR_sched_getaffinity
187 #define __NR_sched_getaffinity 123
188 #elif __NR_sched_getaffinity != 123
189 #error Wrong code for getaffinity system call.
190 #endif /* __NR_sched_getaffinity */
191 #elif KMP_ARCH_X86_64
192 #ifndef __NR_sched_setaffinity
193 #define __NR_sched_setaffinity 203
194 #elif __NR_sched_setaffinity != 203
195 #error Wrong code for setaffinity system call.
196 #endif /* __NR_sched_setaffinity */
197 #ifndef __NR_sched_getaffinity
198 #define __NR_sched_getaffinity 204
199 #elif __NR_sched_getaffinity != 204
200 #error Wrong code for getaffinity system call.
201 #endif /* __NR_sched_getaffinity */
203 #ifndef __NR_sched_setaffinity
204 #define __NR_sched_setaffinity 222
205 #elif __NR_sched_setaffinity != 222
206 #error Wrong code for setaffinity system call.
207 #endif /* __NR_sched_setaffinity */
208 #ifndef __NR_sched_getaffinity
209 #define __NR_sched_getaffinity 223
210 #elif __NR_sched_getaffinity != 223
211 #error Wrong code for getaffinity system call.
212 #endif /* __NR_sched_getaffinity */
214 #ifndef __NR_sched_setaffinity
215 #define __NR_sched_setaffinity 4239
216 #elif __NR_sched_setaffinity != 4239
217 #error Wrong code for setaffinity system call.
218 #endif /* __NR_sched_setaffinity */
219 #ifndef __NR_sched_getaffinity
220 #define __NR_sched_getaffinity 4240
221 #elif __NR_sched_getaffinity != 4240
222 #error Wrong code for getaffinity system call.
223 #endif /* __NR_sched_getaffinity */
224 #elif KMP_ARCH_MIPS64
225 #ifndef __NR_sched_setaffinity
226 #define __NR_sched_setaffinity 5195
227 #elif __NR_sched_setaffinity != 5195
228 #error Wrong code for setaffinity system call.
229 #endif /* __NR_sched_setaffinity */
230 #ifndef __NR_sched_getaffinity
231 #define __NR_sched_getaffinity 5196
232 #elif __NR_sched_getaffinity != 5196
233 #error Wrong code for getaffinity system call.
234 #endif /* __NR_sched_getaffinity */
235 #error Unknown or unsupported architecture
236 #endif /* KMP_ARCH_* */
237 class KMPNativeAffinity : public KMPAffinity {
238 class Mask : public KMPAffinity::Mask {
239 typedef unsigned char mask_t;
240 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
244 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
249 void set(int i) override {
250 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
252 bool is_set(int i) const override {
253 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
255 void clear(int i) override {
256 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
258 void zero() override {
259 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
262 void copy(const KMPAffinity::Mask *src) override {
263 const Mask *convert = static_cast<const Mask *>(src);
264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265 mask[i] = convert->mask[i];
267 void bitwise_and(const KMPAffinity::Mask *rhs) override {
268 const Mask *convert = static_cast<const Mask *>(rhs);
269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270 mask[i] &= convert->mask[i];
272 void bitwise_or(const KMPAffinity::Mask *rhs) override {
273 const Mask *convert = static_cast<const Mask *>(rhs);
274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275 mask[i] |= convert->mask[i];
277 void bitwise_not() override {
278 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
279 mask[i] = ~(mask[i]);
281 int begin() const override {
283 while (retval < end() && !is_set(retval))
287 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
288 int next(int previous) const override {
289 int retval = previous + 1;
290 while (retval < end() && !is_set(retval))
294 int get_system_affinity(bool abort_on_error) override {
295 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
296 "Illegal get affinity operation when not capable");
298 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
303 if (abort_on_error) {
304 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
308 int set_system_affinity(bool abort_on_error) const override {
309 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
310 "Illegal get affinity operation when not capable");
312 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
317 if (abort_on_error) {
318 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
323 void determine_capable(const char *env_var) override {
324 __kmp_affinity_determine_capable(env_var);
326 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
327 KMPAffinity::Mask *allocate_mask() override {
328 KMPNativeAffinity::Mask *retval = new Mask();
331 void deallocate_mask(KMPAffinity::Mask *m) override {
332 KMPNativeAffinity::Mask *native_mask =
333 static_cast<KMPNativeAffinity::Mask *>(m);
336 KMPAffinity::Mask *allocate_mask_array(int num) override {
337 return new Mask[num];
339 void deallocate_mask_array(KMPAffinity::Mask *array) override {
340 Mask *linux_array = static_cast<Mask *>(array);
341 delete[] linux_array;
343 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
344 int index) override {
345 Mask *linux_array = static_cast<Mask *>(array);
346 return &(linux_array[index]);
348 api_type get_api_type() const override { return NATIVE_OS; }
350 #endif /* KMP_OS_LINUX */
353 class KMPNativeAffinity : public KMPAffinity {
354 class Mask : public KMPAffinity::Mask {
355 typedef ULONG_PTR mask_t;
356 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
361 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
367 void set(int i) override {
368 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
370 bool is_set(int i) const override {
371 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
373 void clear(int i) override {
374 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
376 void zero() override {
377 for (int i = 0; i < __kmp_num_proc_groups; ++i)
380 void copy(const KMPAffinity::Mask *src) override {
381 const Mask *convert = static_cast<const Mask *>(src);
382 for (int i = 0; i < __kmp_num_proc_groups; ++i)
383 mask[i] = convert->mask[i];
385 void bitwise_and(const KMPAffinity::Mask *rhs) override {
386 const Mask *convert = static_cast<const Mask *>(rhs);
387 for (int i = 0; i < __kmp_num_proc_groups; ++i)
388 mask[i] &= convert->mask[i];
390 void bitwise_or(const KMPAffinity::Mask *rhs) override {
391 const Mask *convert = static_cast<const Mask *>(rhs);
392 for (int i = 0; i < __kmp_num_proc_groups; ++i)
393 mask[i] |= convert->mask[i];
395 void bitwise_not() override {
396 for (int i = 0; i < __kmp_num_proc_groups; ++i)
397 mask[i] = ~(mask[i]);
399 int begin() const override {
401 while (retval < end() && !is_set(retval))
405 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
406 int next(int previous) const override {
407 int retval = previous + 1;
408 while (retval < end() && !is_set(retval))
412 int set_system_affinity(bool abort_on_error) const override {
413 if (__kmp_num_proc_groups > 1) {
414 // Check for a valid mask.
416 int group = get_proc_group();
418 if (abort_on_error) {
419 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
423 // Transform the bit vector into a GROUP_AFFINITY struct
424 // and make the system call to set affinity.
426 ga.Mask = mask[group];
427 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
429 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
430 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
431 DWORD error = GetLastError();
432 if (abort_on_error) {
433 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
439 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
440 DWORD error = GetLastError();
441 if (abort_on_error) {
442 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
450 int get_system_affinity(bool abort_on_error) override {
451 if (__kmp_num_proc_groups > 1) {
454 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
455 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
456 DWORD error = GetLastError();
457 if (abort_on_error) {
458 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
459 KMP_ERR(error), __kmp_msg_null);
463 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
467 mask[ga.Group] = ga.Mask;
469 mask_t newMask, sysMask, retval;
470 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
471 DWORD error = GetLastError();
472 if (abort_on_error) {
473 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
474 KMP_ERR(error), __kmp_msg_null);
478 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
480 DWORD error = GetLastError();
481 if (abort_on_error) {
482 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
483 KMP_ERR(error), __kmp_msg_null);
487 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
489 DWORD error = GetLastError();
490 if (abort_on_error) {
491 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
492 KMP_ERR(error), __kmp_msg_null);
499 int get_proc_group() const override {
501 if (__kmp_num_proc_groups == 1) {
504 for (int i = 0; i < __kmp_num_proc_groups; i++) {
514 void determine_capable(const char *env_var) override {
515 __kmp_affinity_determine_capable(env_var);
517 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
518 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
519 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
520 KMPAffinity::Mask *allocate_mask_array(int num) override {
521 return new Mask[num];
523 void deallocate_mask_array(KMPAffinity::Mask *array) override {
524 Mask *windows_array = static_cast<Mask *>(array);
525 delete[] windows_array;
527 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
528 int index) override {
529 Mask *windows_array = static_cast<Mask *>(array);
530 return &(windows_array[index]);
532 api_type get_api_type() const override { return NATIVE_OS; }
534 #endif /* KMP_OS_WINDOWS */
535 #endif /* KMP_AFFINITY_SUPPORTED */
539 static const unsigned maxDepth = 32;
540 unsigned labels[maxDepth];
541 unsigned childNums[maxDepth];
544 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
545 Address &operator=(const Address &b) {
547 for (unsigned i = 0; i < depth; i++) {
548 labels[i] = b.labels[i];
549 childNums[i] = b.childNums[i];
554 bool operator==(const Address &b) const {
555 if (depth != b.depth)
557 for (unsigned i = 0; i < depth; i++)
558 if (labels[i] != b.labels[i])
562 bool isClose(const Address &b, int level) const {
563 if (depth != b.depth)
565 if ((unsigned)level >= depth)
567 for (unsigned i = 0; i < (depth - level); i++)
568 if (labels[i] != b.labels[i])
572 bool operator!=(const Address &b) const { return !operator==(b); }
575 printf("Depth: %u --- ", depth);
576 for (i = 0; i < depth; i++) {
577 printf("%u ", labels[i]);
586 AddrUnsPair(Address _first, unsigned _second)
587 : first(_first), second(_second) {}
588 AddrUnsPair &operator=(const AddrUnsPair &b) {
596 printf(" --- second = %u", second);
598 bool operator==(const AddrUnsPair &b) const {
599 if (first != b.first)
601 if (second != b.second)
605 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
608 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
609 const Address *aa = &(((const AddrUnsPair *)a)->first);
610 const Address *bb = &(((const AddrUnsPair *)b)->first);
611 unsigned depth = aa->depth;
613 KMP_DEBUG_ASSERT(depth == bb->depth);
614 for (i = 0; i < depth; i++) {
615 if (aa->labels[i] < bb->labels[i])
617 if (aa->labels[i] > bb->labels[i])
623 /* A structure for holding machine-specific hierarchy info to be computed once
624 at init. This structure represents a mapping of threads to the actual machine
625 hierarchy, or to our best guess at what the hierarchy might be, for the
626 purpose of performing an efficient barrier. In the worst case, when there is
627 no machine hierarchy information, it produces a tree suitable for a barrier,
628 similar to the tree used in the hyper barrier. */
629 class hierarchy_info {
631 /* Good default values for number of leaves and branching factor, given no
632 affinity information. Behaves a bit like hyper barrier. */
633 static const kmp_uint32 maxLeaves = 4;
634 static const kmp_uint32 minBranch = 4;
635 /** Number of levels in the hierarchy. Typical levels are threads/core,
636 cores/package or socket, packages/node, nodes/machine, etc. We don't want
637 to get specific with nomenclature. When the machine is oversubscribed we
638 add levels to duplicate the hierarchy, doubling the thread capacity of the
639 hierarchy each time we add a level. */
640 kmp_uint32 maxLevels;
642 /** This is specifically the depth of the machine configuration hierarchy, in
643 terms of the number of levels along the longest path from root to any
644 leaf. It corresponds to the number of entries in numPerLevel if we exclude
645 all but one trailing 1. */
647 kmp_uint32 base_num_threads;
648 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
649 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
650 // 2=initialization in progress
651 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
653 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
654 the parent of a node at level i has. For example, if we have a machine
655 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
656 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
657 kmp_uint32 *numPerLevel;
658 kmp_uint32 *skipPerLevel;
660 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
661 int hier_depth = adr2os[0].first.depth;
663 for (int i = hier_depth - 1; i >= 0; --i) {
665 for (int j = 0; j < num_addrs; ++j) {
666 int next = adr2os[j].first.childNums[i];
670 numPerLevel[level] = max + 1;
676 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
679 if (!uninitialized && numPerLevel) {
680 __kmp_free(numPerLevel);
682 uninitialized = not_initialized;
686 void init(AddrUnsPair *adr2os, int num_addrs) {
687 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
688 &uninitialized, not_initialized, initializing);
689 if (bool_result == 0) { // Wait for initialization
690 while (TCR_1(uninitialized) != initialized)
694 KMP_DEBUG_ASSERT(bool_result == 1);
696 /* Added explicit initialization of the data fields here to prevent usage of
697 dirty value observed when static library is re-initialized multiple times
698 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
704 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
705 skipPerLevel = &(numPerLevel[maxLevels]);
706 for (kmp_uint32 i = 0; i < maxLevels;
707 ++i) { // init numPerLevel[*] to 1 item per level
712 // Sort table by physical ID
714 qsort(adr2os, num_addrs, sizeof(*adr2os),
715 __kmp_affinity_cmp_Address_labels);
716 deriveLevels(adr2os, num_addrs);
718 numPerLevel[0] = maxLeaves;
719 numPerLevel[1] = num_addrs / maxLeaves;
720 if (num_addrs % maxLeaves)
724 base_num_threads = num_addrs;
725 for (int i = maxLevels - 1; i >= 0;
726 --i) // count non-empty levels to get depth
727 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
730 kmp_uint32 branch = minBranch;
731 if (numPerLevel[0] == 1)
732 branch = num_addrs / maxLeaves;
733 if (branch < minBranch)
735 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
736 while (numPerLevel[d] > branch ||
737 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
738 if (numPerLevel[d] & 1)
740 numPerLevel[d] = numPerLevel[d] >> 1;
741 if (numPerLevel[d + 1] == 1)
743 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
745 if (numPerLevel[0] == 1) {
746 branch = branch >> 1;
752 for (kmp_uint32 i = 1; i < depth; ++i)
753 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
754 // Fill in hierarchy in the case of oversubscription
755 for (kmp_uint32 i = depth; i < maxLevels; ++i)
756 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
758 uninitialized = initialized; // One writer
761 // Resize the hierarchy if nproc changes to something larger than before
762 void resize(kmp_uint32 nproc) {
763 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
764 while (bool_result == 0) { // someone else is trying to resize
766 if (nproc <= base_num_threads) // happy with other thread's resize
768 else // try to resize
769 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
771 KMP_DEBUG_ASSERT(bool_result != 0);
772 if (nproc <= base_num_threads)
773 return; // happy with other thread's resize
775 // Calculate new maxLevels
776 kmp_uint32 old_sz = skipPerLevel[depth - 1];
777 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
778 // First see if old maxLevels is enough to contain new size
779 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
780 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
781 numPerLevel[i - 1] *= 2;
785 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
786 while (nproc > old_sz) {
794 kmp_uint32 *old_numPerLevel = numPerLevel;
795 kmp_uint32 *old_skipPerLevel = skipPerLevel;
796 numPerLevel = skipPerLevel = NULL;
798 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
799 skipPerLevel = &(numPerLevel[maxLevels]);
801 // Copy old elements from old arrays
802 for (kmp_uint32 i = 0; i < old_maxLevels;
803 ++i) { // init numPerLevel[*] to 1 item per level
804 numPerLevel[i] = old_numPerLevel[i];
805 skipPerLevel[i] = old_skipPerLevel[i];
808 // Init new elements in arrays to 1
809 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
810 ++i) { // init numPerLevel[*] to 1 item per level
816 __kmp_free(old_numPerLevel);
819 // Fill in oversubscription levels of hierarchy
820 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
821 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
823 base_num_threads = nproc;
824 resizing = 0; // One writer
827 #endif // KMP_AFFINITY_H