2 * kmp_affinity.h -- header for affinity management
5 //===----------------------------------------------------------------------===//
7 // The LLVM Compiler Infrastructure
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
12 //===----------------------------------------------------------------------===//
14 #ifndef KMP_AFFINITY_H
15 #define KMP_AFFINITY_H
20 #if KMP_AFFINITY_SUPPORTED
22 class KMPHwlocAffinity : public KMPAffinity {
24 class Mask : public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(int i) override { hwloc_bitmap_set(mask, i); }
34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36 void zero() override { hwloc_bitmap_zero(mask); }
37 void copy(const KMPAffinity::Mask *src) override {
38 const Mask *convert = static_cast<const Mask *>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
41 void bitwise_and(const KMPAffinity::Mask *rhs) override {
42 const Mask *convert = static_cast<const Mask *>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
45 void bitwise_or(const KMPAffinity::Mask *rhs) override {
46 const Mask *convert = static_cast<const Mask *>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
49 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50 int begin() const override { return hwloc_bitmap_first(mask); }
51 int end() const override { return -1; }
52 int next(int previous) const override {
53 return hwloc_bitmap_next(mask, previous);
55 int get_system_affinity(bool abort_on_error) override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
69 int set_system_affinity(bool abort_on_error) const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal get affinity operation when not capable");
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
83 int get_proc_group() const override {
86 if (__kmp_num_proc_groups == 1) {
89 for (int i = 0; i < __kmp_num_proc_groups; i++) {
90 // On windows, the long type is always 32 bits
91 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
92 unsigned long second_32_bits =
93 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
94 if (first_32_bits == 0 && second_32_bits == 0) {
102 #endif /* KMP_OS_WINDOWS */
106 void determine_capable(const char *var) override {
107 const hwloc_topology_support *topology_support;
108 if (__kmp_hwloc_topology == NULL) {
109 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
110 __kmp_hwloc_error = TRUE;
111 if (__kmp_affinity_verbose)
112 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
114 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
115 __kmp_hwloc_error = TRUE;
116 if (__kmp_affinity_verbose)
117 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
120 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
121 // Is the system capable of setting/getting this thread's affinity?
122 // Also, is topology discovery possible? (pu indicates ability to discover
123 // processing units). And finally, were there no errors when calling any
124 // hwloc_* API functions?
125 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
126 topology_support->cpubind->get_thisthread_cpubind &&
127 topology_support->discovery->pu && !__kmp_hwloc_error) {
128 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
129 KMP_AFFINITY_ENABLE(TRUE);
131 // indicate that hwloc didn't work and disable affinity
132 __kmp_hwloc_error = TRUE;
133 KMP_AFFINITY_DISABLE();
136 void bind_thread(int which) override {
137 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
138 "Illegal set affinity operation when not capable");
139 KMPAffinity::Mask *mask;
140 KMP_CPU_ALLOC_ON_STACK(mask);
142 KMP_CPU_SET(which, mask);
143 __kmp_set_system_affinity(mask, TRUE);
144 KMP_CPU_FREE_FROM_STACK(mask);
146 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
147 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
148 KMPAffinity::Mask *allocate_mask_array(int num) override {
149 return new Mask[num];
151 void deallocate_mask_array(KMPAffinity::Mask *array) override {
152 Mask *hwloc_array = static_cast<Mask *>(array);
153 delete[] hwloc_array;
155 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
156 int index) override {
157 Mask *hwloc_array = static_cast<Mask *>(array);
158 return &(hwloc_array[index]);
160 api_type get_api_type() const override { return HWLOC; }
162 #endif /* KMP_USE_HWLOC */
165 /* On some of the older OS's that we build on, these constants aren't present
166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167 all systems of the same arch where they are defined, and they cannot change.
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
215 #ifndef __NR_sched_setaffinity
216 #define __NR_sched_setaffinity 4239
217 #elif __NR_sched_setaffinity != 4239
218 #error Wrong code for setaffinity system call.
219 #endif /* __NR_sched_setaffinity */
220 #ifndef __NR_sched_getaffinity
221 #define __NR_sched_getaffinity 4240
222 #elif __NR_sched_getaffinity != 4240
223 #error Wrong code for getaffinity system call.
224 #endif /* __NR_sched_getaffinity */
225 #elif KMP_ARCH_MIPS64
226 #ifndef __NR_sched_setaffinity
227 #define __NR_sched_setaffinity 5195
228 #elif __NR_sched_setaffinity != 5195
229 #error Wrong code for setaffinity system call.
230 #endif /* __NR_sched_setaffinity */
231 #ifndef __NR_sched_getaffinity
232 #define __NR_sched_getaffinity 5196
233 #elif __NR_sched_getaffinity != 5196
234 #error Wrong code for getaffinity system call.
235 #endif /* __NR_sched_getaffinity */
236 #error Unknown or unsupported architecture
237 #endif /* KMP_ARCH_* */
238 class KMPNativeAffinity : public KMPAffinity {
239 class Mask : public KMPAffinity::Mask {
240 typedef unsigned char mask_t;
241 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
245 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
250 void set(int i) override {
251 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
253 bool is_set(int i) const override {
254 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
256 void clear(int i) override {
257 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
259 void zero() override {
260 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
263 void copy(const KMPAffinity::Mask *src) override {
264 const Mask *convert = static_cast<const Mask *>(src);
265 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
266 mask[i] = convert->mask[i];
268 void bitwise_and(const KMPAffinity::Mask *rhs) override {
269 const Mask *convert = static_cast<const Mask *>(rhs);
270 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
271 mask[i] &= convert->mask[i];
273 void bitwise_or(const KMPAffinity::Mask *rhs) override {
274 const Mask *convert = static_cast<const Mask *>(rhs);
275 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
276 mask[i] |= convert->mask[i];
278 void bitwise_not() override {
279 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280 mask[i] = ~(mask[i]);
282 int begin() const override {
284 while (retval < end() && !is_set(retval))
288 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
289 int next(int previous) const override {
290 int retval = previous + 1;
291 while (retval < end() && !is_set(retval))
295 int get_system_affinity(bool abort_on_error) override {
296 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
297 "Illegal get affinity operation when not capable");
299 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
304 if (abort_on_error) {
305 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
309 int set_system_affinity(bool abort_on_error) const override {
310 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
311 "Illegal get affinity operation when not capable");
313 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
318 if (abort_on_error) {
319 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
324 void determine_capable(const char *env_var) override {
325 __kmp_affinity_determine_capable(env_var);
327 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
328 KMPAffinity::Mask *allocate_mask() override {
329 KMPNativeAffinity::Mask *retval = new Mask();
332 void deallocate_mask(KMPAffinity::Mask *m) override {
333 KMPNativeAffinity::Mask *native_mask =
334 static_cast<KMPNativeAffinity::Mask *>(m);
337 KMPAffinity::Mask *allocate_mask_array(int num) override {
338 return new Mask[num];
340 void deallocate_mask_array(KMPAffinity::Mask *array) override {
341 Mask *linux_array = static_cast<Mask *>(array);
342 delete[] linux_array;
344 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
345 int index) override {
346 Mask *linux_array = static_cast<Mask *>(array);
347 return &(linux_array[index]);
349 api_type get_api_type() const override { return NATIVE_OS; }
351 #endif /* KMP_OS_LINUX */
354 class KMPNativeAffinity : public KMPAffinity {
355 class Mask : public KMPAffinity::Mask {
356 typedef ULONG_PTR mask_t;
357 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
362 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
368 void set(int i) override {
369 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
371 bool is_set(int i) const override {
372 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
374 void clear(int i) override {
375 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
377 void zero() override {
378 for (int i = 0; i < __kmp_num_proc_groups; ++i)
381 void copy(const KMPAffinity::Mask *src) override {
382 const Mask *convert = static_cast<const Mask *>(src);
383 for (int i = 0; i < __kmp_num_proc_groups; ++i)
384 mask[i] = convert->mask[i];
386 void bitwise_and(const KMPAffinity::Mask *rhs) override {
387 const Mask *convert = static_cast<const Mask *>(rhs);
388 for (int i = 0; i < __kmp_num_proc_groups; ++i)
389 mask[i] &= convert->mask[i];
391 void bitwise_or(const KMPAffinity::Mask *rhs) override {
392 const Mask *convert = static_cast<const Mask *>(rhs);
393 for (int i = 0; i < __kmp_num_proc_groups; ++i)
394 mask[i] |= convert->mask[i];
396 void bitwise_not() override {
397 for (int i = 0; i < __kmp_num_proc_groups; ++i)
398 mask[i] = ~(mask[i]);
400 int begin() const override {
402 while (retval < end() && !is_set(retval))
406 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
407 int next(int previous) const override {
408 int retval = previous + 1;
409 while (retval < end() && !is_set(retval))
413 int set_system_affinity(bool abort_on_error) const override {
414 if (__kmp_num_proc_groups > 1) {
415 // Check for a valid mask.
417 int group = get_proc_group();
419 if (abort_on_error) {
420 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
424 // Transform the bit vector into a GROUP_AFFINITY struct
425 // and make the system call to set affinity.
427 ga.Mask = mask[group];
428 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
430 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
431 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
432 DWORD error = GetLastError();
433 if (abort_on_error) {
434 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
440 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
441 DWORD error = GetLastError();
442 if (abort_on_error) {
443 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
451 int get_system_affinity(bool abort_on_error) override {
452 if (__kmp_num_proc_groups > 1) {
455 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
456 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
457 DWORD error = GetLastError();
458 if (abort_on_error) {
459 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
460 KMP_ERR(error), __kmp_msg_null);
464 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
468 mask[ga.Group] = ga.Mask;
470 mask_t newMask, sysMask, retval;
471 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
472 DWORD error = GetLastError();
473 if (abort_on_error) {
474 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
475 KMP_ERR(error), __kmp_msg_null);
479 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
481 DWORD error = GetLastError();
482 if (abort_on_error) {
483 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
484 KMP_ERR(error), __kmp_msg_null);
488 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
490 DWORD error = GetLastError();
491 if (abort_on_error) {
492 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
493 KMP_ERR(error), __kmp_msg_null);
500 int get_proc_group() const override {
502 if (__kmp_num_proc_groups == 1) {
505 for (int i = 0; i < __kmp_num_proc_groups; i++) {
515 void determine_capable(const char *env_var) override {
516 __kmp_affinity_determine_capable(env_var);
518 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
519 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
520 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
521 KMPAffinity::Mask *allocate_mask_array(int num) override {
522 return new Mask[num];
524 void deallocate_mask_array(KMPAffinity::Mask *array) override {
525 Mask *windows_array = static_cast<Mask *>(array);
526 delete[] windows_array;
528 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
529 int index) override {
530 Mask *windows_array = static_cast<Mask *>(array);
531 return &(windows_array[index]);
533 api_type get_api_type() const override { return NATIVE_OS; }
535 #endif /* KMP_OS_WINDOWS */
536 #endif /* KMP_AFFINITY_SUPPORTED */
540 static const unsigned maxDepth = 32;
541 unsigned labels[maxDepth];
542 unsigned childNums[maxDepth];
545 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
546 Address &operator=(const Address &b) {
548 for (unsigned i = 0; i < depth; i++) {
549 labels[i] = b.labels[i];
550 childNums[i] = b.childNums[i];
555 bool operator==(const Address &b) const {
556 if (depth != b.depth)
558 for (unsigned i = 0; i < depth; i++)
559 if (labels[i] != b.labels[i])
563 bool isClose(const Address &b, int level) const {
564 if (depth != b.depth)
566 if ((unsigned)level >= depth)
568 for (unsigned i = 0; i < (depth - level); i++)
569 if (labels[i] != b.labels[i])
573 bool operator!=(const Address &b) const { return !operator==(b); }
576 printf("Depth: %u --- ", depth);
577 for (i = 0; i < depth; i++) {
578 printf("%u ", labels[i]);
587 AddrUnsPair(Address _first, unsigned _second)
588 : first(_first), second(_second) {}
589 AddrUnsPair &operator=(const AddrUnsPair &b) {
597 printf(" --- second = %u", second);
599 bool operator==(const AddrUnsPair &b) const {
600 if (first != b.first)
602 if (second != b.second)
606 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
609 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
610 const Address *aa = &(((const AddrUnsPair *)a)->first);
611 const Address *bb = &(((const AddrUnsPair *)b)->first);
612 unsigned depth = aa->depth;
614 KMP_DEBUG_ASSERT(depth == bb->depth);
615 for (i = 0; i < depth; i++) {
616 if (aa->labels[i] < bb->labels[i])
618 if (aa->labels[i] > bb->labels[i])
624 /* A structure for holding machine-specific hierarchy info to be computed once
625 at init. This structure represents a mapping of threads to the actual machine
626 hierarchy, or to our best guess at what the hierarchy might be, for the
627 purpose of performing an efficient barrier. In the worst case, when there is
628 no machine hierarchy information, it produces a tree suitable for a barrier,
629 similar to the tree used in the hyper barrier. */
630 class hierarchy_info {
632 /* Good default values for number of leaves and branching factor, given no
633 affinity information. Behaves a bit like hyper barrier. */
634 static const kmp_uint32 maxLeaves = 4;
635 static const kmp_uint32 minBranch = 4;
636 /** Number of levels in the hierarchy. Typical levels are threads/core,
637 cores/package or socket, packages/node, nodes/machine, etc. We don't want
638 to get specific with nomenclature. When the machine is oversubscribed we
639 add levels to duplicate the hierarchy, doubling the thread capacity of the
640 hierarchy each time we add a level. */
641 kmp_uint32 maxLevels;
643 /** This is specifically the depth of the machine configuration hierarchy, in
644 terms of the number of levels along the longest path from root to any
645 leaf. It corresponds to the number of entries in numPerLevel if we exclude
646 all but one trailing 1. */
648 kmp_uint32 base_num_threads;
649 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
650 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
651 // 2=initialization in progress
652 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
654 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
655 the parent of a node at level i has. For example, if we have a machine
656 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
657 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
658 kmp_uint32 *numPerLevel;
659 kmp_uint32 *skipPerLevel;
661 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
662 int hier_depth = adr2os[0].first.depth;
664 for (int i = hier_depth - 1; i >= 0; --i) {
666 for (int j = 0; j < num_addrs; ++j) {
667 int next = adr2os[j].first.childNums[i];
671 numPerLevel[level] = max + 1;
677 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
680 if (!uninitialized && numPerLevel) {
681 __kmp_free(numPerLevel);
683 uninitialized = not_initialized;
687 void init(AddrUnsPair *adr2os, int num_addrs) {
688 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
689 &uninitialized, not_initialized, initializing);
690 if (bool_result == 0) { // Wait for initialization
691 while (TCR_1(uninitialized) != initialized)
695 KMP_DEBUG_ASSERT(bool_result == 1);
697 /* Added explicit initialization of the data fields here to prevent usage of
698 dirty value observed when static library is re-initialized multiple times
699 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
705 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
706 skipPerLevel = &(numPerLevel[maxLevels]);
707 for (kmp_uint32 i = 0; i < maxLevels;
708 ++i) { // init numPerLevel[*] to 1 item per level
713 // Sort table by physical ID
715 qsort(adr2os, num_addrs, sizeof(*adr2os),
716 __kmp_affinity_cmp_Address_labels);
717 deriveLevels(adr2os, num_addrs);
719 numPerLevel[0] = maxLeaves;
720 numPerLevel[1] = num_addrs / maxLeaves;
721 if (num_addrs % maxLeaves)
725 base_num_threads = num_addrs;
726 for (int i = maxLevels - 1; i >= 0;
727 --i) // count non-empty levels to get depth
728 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
731 kmp_uint32 branch = minBranch;
732 if (numPerLevel[0] == 1)
733 branch = num_addrs / maxLeaves;
734 if (branch < minBranch)
736 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
737 while (numPerLevel[d] > branch ||
738 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
739 if (numPerLevel[d] & 1)
741 numPerLevel[d] = numPerLevel[d] >> 1;
742 if (numPerLevel[d + 1] == 1)
744 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
746 if (numPerLevel[0] == 1) {
747 branch = branch >> 1;
753 for (kmp_uint32 i = 1; i < depth; ++i)
754 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
755 // Fill in hierarchy in the case of oversubscription
756 for (kmp_uint32 i = depth; i < maxLevels; ++i)
757 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
759 uninitialized = initialized; // One writer
762 // Resize the hierarchy if nproc changes to something larger than before
763 void resize(kmp_uint32 nproc) {
764 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
765 while (bool_result == 0) { // someone else is trying to resize
767 if (nproc <= base_num_threads) // happy with other thread's resize
769 else // try to resize
770 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
772 KMP_DEBUG_ASSERT(bool_result != 0);
773 if (nproc <= base_num_threads)
774 return; // happy with other thread's resize
776 // Calculate new maxLevels
777 kmp_uint32 old_sz = skipPerLevel[depth - 1];
778 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
779 // First see if old maxLevels is enough to contain new size
780 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
781 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
782 numPerLevel[i - 1] *= 2;
786 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
787 while (nproc > old_sz) {
795 kmp_uint32 *old_numPerLevel = numPerLevel;
796 kmp_uint32 *old_skipPerLevel = skipPerLevel;
797 numPerLevel = skipPerLevel = NULL;
799 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
800 skipPerLevel = &(numPerLevel[maxLevels]);
802 // Copy old elements from old arrays
803 for (kmp_uint32 i = 0; i < old_maxLevels;
804 ++i) { // init numPerLevel[*] to 1 item per level
805 numPerLevel[i] = old_numPerLevel[i];
806 skipPerLevel[i] = old_skipPerLevel[i];
809 // Init new elements in arrays to 1
810 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
811 ++i) { // init numPerLevel[*] to 1 item per level
817 __kmp_free(old_numPerLevel);
820 // Fill in oversubscription levels of hierarchy
821 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
822 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
824 base_num_threads = nproc;
825 resizing = 0; // One writer
828 #endif // KMP_AFFINITY_H