2 * kmp_affinity.h -- header for affinity management
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
19 #if KMP_AFFINITY_SUPPORTED
21 class KMPHwlocAffinity : public KMPAffinity {
23 class Mask : public KMPAffinity::Mask {
28 mask = hwloc_bitmap_alloc();
31 ~Mask() { hwloc_bitmap_free(mask); }
32 void set(int i) override { hwloc_bitmap_set(mask, i); }
33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35 void zero() override { hwloc_bitmap_zero(mask); }
36 void copy(const KMPAffinity::Mask *src) override {
37 const Mask *convert = static_cast<const Mask *>(src);
38 hwloc_bitmap_copy(mask, convert->mask);
40 void bitwise_and(const KMPAffinity::Mask *rhs) override {
41 const Mask *convert = static_cast<const Mask *>(rhs);
42 hwloc_bitmap_and(mask, mask, convert->mask);
44 void bitwise_or(const KMPAffinity::Mask *rhs) override {
45 const Mask *convert = static_cast<const Mask *>(rhs);
46 hwloc_bitmap_or(mask, mask, convert->mask);
48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49 int begin() const override { return hwloc_bitmap_first(mask); }
50 int end() const override { return -1; }
51 int next(int previous) const override {
52 return hwloc_bitmap_next(mask, previous);
54 int get_system_affinity(bool abort_on_error) override {
55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56 "Illegal get affinity operation when not capable");
58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
68 int set_system_affinity(bool abort_on_error) const override {
69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70 "Illegal get affinity operation when not capable");
72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
82 int get_proc_group() const override {
85 if (__kmp_num_proc_groups == 1) {
88 for (int i = 0; i < __kmp_num_proc_groups; i++) {
89 // On windows, the long type is always 32 bits
90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91 unsigned long second_32_bits =
92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93 if (first_32_bits == 0 && second_32_bits == 0) {
101 #endif /* KMP_OS_WINDOWS */
105 void determine_capable(const char *var) override {
106 const hwloc_topology_support *topology_support;
107 if (__kmp_hwloc_topology == NULL) {
108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109 __kmp_hwloc_error = TRUE;
110 if (__kmp_affinity_verbose)
111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114 __kmp_hwloc_error = TRUE;
115 if (__kmp_affinity_verbose)
116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120 // Is the system capable of setting/getting this thread's affinity?
121 // Also, is topology discovery possible? (pu indicates ability to discover
122 // processing units). And finally, were there no errors when calling any
123 // hwloc_* API functions?
124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125 topology_support->cpubind->get_thisthread_cpubind &&
126 topology_support->discovery->pu && !__kmp_hwloc_error) {
127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128 KMP_AFFINITY_ENABLE(TRUE);
130 // indicate that hwloc didn't work and disable affinity
131 __kmp_hwloc_error = TRUE;
132 KMP_AFFINITY_DISABLE();
135 void bind_thread(int which) override {
136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137 "Illegal set affinity operation when not capable");
138 KMPAffinity::Mask *mask;
139 KMP_CPU_ALLOC_ON_STACK(mask);
141 KMP_CPU_SET(which, mask);
142 __kmp_set_system_affinity(mask, TRUE);
143 KMP_CPU_FREE_FROM_STACK(mask);
145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147 KMPAffinity::Mask *allocate_mask_array(int num) override {
148 return new Mask[num];
150 void deallocate_mask_array(KMPAffinity::Mask *array) override {
151 Mask *hwloc_array = static_cast<Mask *>(array);
152 delete[] hwloc_array;
154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155 int index) override {
156 Mask *hwloc_array = static_cast<Mask *>(array);
157 return &(hwloc_array[index]);
159 api_type get_api_type() const override { return HWLOC; }
161 #endif /* KMP_USE_HWLOC */
163 #if KMP_OS_LINUX || KMP_OS_FREEBSD
165 /* On some of the older OS's that we build on, these constants aren't present
166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167 all systems of the same arch where they are defined, and they cannot change.
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
215 #ifndef __NR_sched_setaffinity
216 #define __NR_sched_setaffinity 4239
217 #elif __NR_sched_setaffinity != 4239
218 #error Wrong code for setaffinity system call.
219 #endif /* __NR_sched_setaffinity */
220 #ifndef __NR_sched_getaffinity
221 #define __NR_sched_getaffinity 4240
222 #elif __NR_sched_getaffinity != 4240
223 #error Wrong code for getaffinity system call.
224 #endif /* __NR_sched_getaffinity */
225 #elif KMP_ARCH_MIPS64
226 #ifndef __NR_sched_setaffinity
227 #define __NR_sched_setaffinity 5195
228 #elif __NR_sched_setaffinity != 5195
229 #error Wrong code for setaffinity system call.
230 #endif /* __NR_sched_setaffinity */
231 #ifndef __NR_sched_getaffinity
232 #define __NR_sched_getaffinity 5196
233 #elif __NR_sched_getaffinity != 5196
234 #error Wrong code for getaffinity system call.
235 #endif /* __NR_sched_getaffinity */
236 #error Unknown or unsupported architecture
237 #endif /* KMP_ARCH_* */
240 #include <pthread_np.h>
242 class KMPNativeAffinity : public KMPAffinity {
243 class Mask : public KMPAffinity::Mask {
244 typedef unsigned char mask_t;
245 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
249 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
254 void set(int i) override {
255 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
257 bool is_set(int i) const override {
258 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
260 void clear(int i) override {
261 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
263 void zero() override {
264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
267 void copy(const KMPAffinity::Mask *src) override {
268 const Mask *convert = static_cast<const Mask *>(src);
269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270 mask[i] = convert->mask[i];
272 void bitwise_and(const KMPAffinity::Mask *rhs) override {
273 const Mask *convert = static_cast<const Mask *>(rhs);
274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275 mask[i] &= convert->mask[i];
277 void bitwise_or(const KMPAffinity::Mask *rhs) override {
278 const Mask *convert = static_cast<const Mask *>(rhs);
279 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280 mask[i] |= convert->mask[i];
282 void bitwise_not() override {
283 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
284 mask[i] = ~(mask[i]);
286 int begin() const override {
288 while (retval < end() && !is_set(retval))
292 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
293 int next(int previous) const override {
294 int retval = previous + 1;
295 while (retval < end() && !is_set(retval))
299 int get_system_affinity(bool abort_on_error) override {
300 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
301 "Illegal get affinity operation when not capable");
304 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
307 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
313 if (abort_on_error) {
314 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
318 int set_system_affinity(bool abort_on_error) const override {
319 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
320 "Illegal get affinity operation when not capable");
323 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
326 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
332 if (abort_on_error) {
333 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
338 void determine_capable(const char *env_var) override {
339 __kmp_affinity_determine_capable(env_var);
341 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
342 KMPAffinity::Mask *allocate_mask() override {
343 KMPNativeAffinity::Mask *retval = new Mask();
346 void deallocate_mask(KMPAffinity::Mask *m) override {
347 KMPNativeAffinity::Mask *native_mask =
348 static_cast<KMPNativeAffinity::Mask *>(m);
351 KMPAffinity::Mask *allocate_mask_array(int num) override {
352 return new Mask[num];
354 void deallocate_mask_array(KMPAffinity::Mask *array) override {
355 Mask *linux_array = static_cast<Mask *>(array);
356 delete[] linux_array;
358 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
359 int index) override {
360 Mask *linux_array = static_cast<Mask *>(array);
361 return &(linux_array[index]);
363 api_type get_api_type() const override { return NATIVE_OS; }
365 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
368 class KMPNativeAffinity : public KMPAffinity {
369 class Mask : public KMPAffinity::Mask {
370 typedef ULONG_PTR mask_t;
371 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
376 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
382 void set(int i) override {
383 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
385 bool is_set(int i) const override {
386 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
388 void clear(int i) override {
389 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
391 void zero() override {
392 for (int i = 0; i < __kmp_num_proc_groups; ++i)
395 void copy(const KMPAffinity::Mask *src) override {
396 const Mask *convert = static_cast<const Mask *>(src);
397 for (int i = 0; i < __kmp_num_proc_groups; ++i)
398 mask[i] = convert->mask[i];
400 void bitwise_and(const KMPAffinity::Mask *rhs) override {
401 const Mask *convert = static_cast<const Mask *>(rhs);
402 for (int i = 0; i < __kmp_num_proc_groups; ++i)
403 mask[i] &= convert->mask[i];
405 void bitwise_or(const KMPAffinity::Mask *rhs) override {
406 const Mask *convert = static_cast<const Mask *>(rhs);
407 for (int i = 0; i < __kmp_num_proc_groups; ++i)
408 mask[i] |= convert->mask[i];
410 void bitwise_not() override {
411 for (int i = 0; i < __kmp_num_proc_groups; ++i)
412 mask[i] = ~(mask[i]);
414 int begin() const override {
416 while (retval < end() && !is_set(retval))
420 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
421 int next(int previous) const override {
422 int retval = previous + 1;
423 while (retval < end() && !is_set(retval))
427 int set_system_affinity(bool abort_on_error) const override {
428 if (__kmp_num_proc_groups > 1) {
429 // Check for a valid mask.
431 int group = get_proc_group();
433 if (abort_on_error) {
434 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
438 // Transform the bit vector into a GROUP_AFFINITY struct
439 // and make the system call to set affinity.
441 ga.Mask = mask[group];
442 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
444 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
445 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
446 DWORD error = GetLastError();
447 if (abort_on_error) {
448 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
454 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
455 DWORD error = GetLastError();
456 if (abort_on_error) {
457 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
465 int get_system_affinity(bool abort_on_error) override {
466 if (__kmp_num_proc_groups > 1) {
469 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
470 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
471 DWORD error = GetLastError();
472 if (abort_on_error) {
473 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
474 KMP_ERR(error), __kmp_msg_null);
478 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
482 mask[ga.Group] = ga.Mask;
484 mask_t newMask, sysMask, retval;
485 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
486 DWORD error = GetLastError();
487 if (abort_on_error) {
488 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
489 KMP_ERR(error), __kmp_msg_null);
493 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
495 DWORD error = GetLastError();
496 if (abort_on_error) {
497 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
498 KMP_ERR(error), __kmp_msg_null);
502 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
504 DWORD error = GetLastError();
505 if (abort_on_error) {
506 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
507 KMP_ERR(error), __kmp_msg_null);
514 int get_proc_group() const override {
516 if (__kmp_num_proc_groups == 1) {
519 for (int i = 0; i < __kmp_num_proc_groups; i++) {
529 void determine_capable(const char *env_var) override {
530 __kmp_affinity_determine_capable(env_var);
532 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
533 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
534 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
535 KMPAffinity::Mask *allocate_mask_array(int num) override {
536 return new Mask[num];
538 void deallocate_mask_array(KMPAffinity::Mask *array) override {
539 Mask *windows_array = static_cast<Mask *>(array);
540 delete[] windows_array;
542 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
543 int index) override {
544 Mask *windows_array = static_cast<Mask *>(array);
545 return &(windows_array[index]);
547 api_type get_api_type() const override { return NATIVE_OS; }
549 #endif /* KMP_OS_WINDOWS */
550 #endif /* KMP_AFFINITY_SUPPORTED */
554 static const unsigned maxDepth = 32;
555 unsigned labels[maxDepth];
556 unsigned childNums[maxDepth];
559 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
560 Address &operator=(const Address &b) {
562 for (unsigned i = 0; i < depth; i++) {
563 labels[i] = b.labels[i];
564 childNums[i] = b.childNums[i];
569 bool operator==(const Address &b) const {
570 if (depth != b.depth)
572 for (unsigned i = 0; i < depth; i++)
573 if (labels[i] != b.labels[i])
577 bool isClose(const Address &b, int level) const {
578 if (depth != b.depth)
580 if ((unsigned)level >= depth)
582 for (unsigned i = 0; i < (depth - level); i++)
583 if (labels[i] != b.labels[i])
587 bool operator!=(const Address &b) const { return !operator==(b); }
590 printf("Depth: %u --- ", depth);
591 for (i = 0; i < depth; i++) {
592 printf("%u ", labels[i]);
601 AddrUnsPair(Address _first, unsigned _second)
602 : first(_first), second(_second) {}
603 AddrUnsPair &operator=(const AddrUnsPair &b) {
611 printf(" --- second = %u", second);
613 bool operator==(const AddrUnsPair &b) const {
614 if (first != b.first)
616 if (second != b.second)
620 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
623 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
624 const Address *aa = &(((const AddrUnsPair *)a)->first);
625 const Address *bb = &(((const AddrUnsPair *)b)->first);
626 unsigned depth = aa->depth;
628 KMP_DEBUG_ASSERT(depth == bb->depth);
629 for (i = 0; i < depth; i++) {
630 if (aa->labels[i] < bb->labels[i])
632 if (aa->labels[i] > bb->labels[i])
638 /* A structure for holding machine-specific hierarchy info to be computed once
639 at init. This structure represents a mapping of threads to the actual machine
640 hierarchy, or to our best guess at what the hierarchy might be, for the
641 purpose of performing an efficient barrier. In the worst case, when there is
642 no machine hierarchy information, it produces a tree suitable for a barrier,
643 similar to the tree used in the hyper barrier. */
644 class hierarchy_info {
646 /* Good default values for number of leaves and branching factor, given no
647 affinity information. Behaves a bit like hyper barrier. */
648 static const kmp_uint32 maxLeaves = 4;
649 static const kmp_uint32 minBranch = 4;
650 /** Number of levels in the hierarchy. Typical levels are threads/core,
651 cores/package or socket, packages/node, nodes/machine, etc. We don't want
652 to get specific with nomenclature. When the machine is oversubscribed we
653 add levels to duplicate the hierarchy, doubling the thread capacity of the
654 hierarchy each time we add a level. */
655 kmp_uint32 maxLevels;
657 /** This is specifically the depth of the machine configuration hierarchy, in
658 terms of the number of levels along the longest path from root to any
659 leaf. It corresponds to the number of entries in numPerLevel if we exclude
660 all but one trailing 1. */
662 kmp_uint32 base_num_threads;
663 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
664 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
665 // 2=initialization in progress
666 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
668 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
669 the parent of a node at level i has. For example, if we have a machine
670 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
671 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
672 kmp_uint32 *numPerLevel;
673 kmp_uint32 *skipPerLevel;
675 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
676 int hier_depth = adr2os[0].first.depth;
678 for (int i = hier_depth - 1; i >= 0; --i) {
680 for (int j = 0; j < num_addrs; ++j) {
681 int next = adr2os[j].first.childNums[i];
685 numPerLevel[level] = max + 1;
691 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
694 if (!uninitialized && numPerLevel) {
695 __kmp_free(numPerLevel);
697 uninitialized = not_initialized;
701 void init(AddrUnsPair *adr2os, int num_addrs) {
702 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
703 &uninitialized, not_initialized, initializing);
704 if (bool_result == 0) { // Wait for initialization
705 while (TCR_1(uninitialized) != initialized)
709 KMP_DEBUG_ASSERT(bool_result == 1);
711 /* Added explicit initialization of the data fields here to prevent usage of
712 dirty value observed when static library is re-initialized multiple times
713 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
719 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
720 skipPerLevel = &(numPerLevel[maxLevels]);
721 for (kmp_uint32 i = 0; i < maxLevels;
722 ++i) { // init numPerLevel[*] to 1 item per level
727 // Sort table by physical ID
729 qsort(adr2os, num_addrs, sizeof(*adr2os),
730 __kmp_affinity_cmp_Address_labels);
731 deriveLevels(adr2os, num_addrs);
733 numPerLevel[0] = maxLeaves;
734 numPerLevel[1] = num_addrs / maxLeaves;
735 if (num_addrs % maxLeaves)
739 base_num_threads = num_addrs;
740 for (int i = maxLevels - 1; i >= 0;
741 --i) // count non-empty levels to get depth
742 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
745 kmp_uint32 branch = minBranch;
746 if (numPerLevel[0] == 1)
747 branch = num_addrs / maxLeaves;
748 if (branch < minBranch)
750 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
751 while (numPerLevel[d] > branch ||
752 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
753 if (numPerLevel[d] & 1)
755 numPerLevel[d] = numPerLevel[d] >> 1;
756 if (numPerLevel[d + 1] == 1)
758 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
760 if (numPerLevel[0] == 1) {
761 branch = branch >> 1;
767 for (kmp_uint32 i = 1; i < depth; ++i)
768 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
769 // Fill in hierarchy in the case of oversubscription
770 for (kmp_uint32 i = depth; i < maxLevels; ++i)
771 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
773 uninitialized = initialized; // One writer
776 // Resize the hierarchy if nproc changes to something larger than before
777 void resize(kmp_uint32 nproc) {
778 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
779 while (bool_result == 0) { // someone else is trying to resize
781 if (nproc <= base_num_threads) // happy with other thread's resize
783 else // try to resize
784 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
786 KMP_DEBUG_ASSERT(bool_result != 0);
787 if (nproc <= base_num_threads)
788 return; // happy with other thread's resize
790 // Calculate new maxLevels
791 kmp_uint32 old_sz = skipPerLevel[depth - 1];
792 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
793 // First see if old maxLevels is enough to contain new size
794 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
795 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
796 numPerLevel[i - 1] *= 2;
800 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
801 while (nproc > old_sz) {
809 kmp_uint32 *old_numPerLevel = numPerLevel;
810 kmp_uint32 *old_skipPerLevel = skipPerLevel;
811 numPerLevel = skipPerLevel = NULL;
813 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
814 skipPerLevel = &(numPerLevel[maxLevels]);
816 // Copy old elements from old arrays
817 for (kmp_uint32 i = 0; i < old_maxLevels;
818 ++i) { // init numPerLevel[*] to 1 item per level
819 numPerLevel[i] = old_numPerLevel[i];
820 skipPerLevel[i] = old_skipPerLevel[i];
823 // Init new elements in arrays to 1
824 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
825 ++i) { // init numPerLevel[*] to 1 item per level
831 __kmp_free(old_numPerLevel);
834 // Fill in oversubscription levels of hierarchy
835 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
836 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
838 base_num_threads = nproc;
839 resizing = 0; // One writer
842 #endif // KMP_AFFINITY_H