contrib/llvm-project/openmp/runtime/src/kmp_affinity.h

   1 /*
   2  * kmp_affinity.h -- header for affinity management
   3  */
   4
   5 //===----------------------------------------------------------------------===//
   6 //
   7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   8 // See https://llvm.org/LICENSE.txt for license information.
   9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef KMP_AFFINITY_H
  14 #define KMP_AFFINITY_H
  15
  16 #include "kmp.h"
  17 #include "kmp_os.h"
  18
  19 #if KMP_AFFINITY_SUPPORTED
  20 #if KMP_USE_HWLOC
  21 class KMPHwlocAffinity : public KMPAffinity {
  22 public:
  23   class Mask : public KMPAffinity::Mask {
  24     hwloc_cpuset_t mask;
  25
  26   public:
  27     Mask() {
  28       mask = hwloc_bitmap_alloc();
  29       this->zero();
  30     }
  31     ~Mask() { hwloc_bitmap_free(mask); }
  32     void set(int i) override { hwloc_bitmap_set(mask, i); }
  33     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
  34     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
  35     void zero() override { hwloc_bitmap_zero(mask); }
  36     void copy(const KMPAffinity::Mask *src) override {
  37       const Mask *convert = static_cast<const Mask *>(src);
  38       hwloc_bitmap_copy(mask, convert->mask);
  39     }
  40     void bitwise_and(const KMPAffinity::Mask *rhs) override {
  41       const Mask *convert = static_cast<const Mask *>(rhs);
  42       hwloc_bitmap_and(mask, mask, convert->mask);
  43     }
  44     void bitwise_or(const KMPAffinity::Mask *rhs) override {
  45       const Mask *convert = static_cast<const Mask *>(rhs);
  46       hwloc_bitmap_or(mask, mask, convert->mask);
  47     }
  48     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
  49     int begin() const override { return hwloc_bitmap_first(mask); }
  50     int end() const override { return -1; }
  51     int next(int previous) const override {
  52       return hwloc_bitmap_next(mask, previous);
  53     }
  54     int get_system_affinity(bool abort_on_error) override {
  55       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
  56                   "Illegal get affinity operation when not capable");
  57       int retval =
  58           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
  59       if (retval >= 0) {
  60         return 0;
  61       }
  62       int error = errno;
  63       if (abort_on_error) {
  64         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
  65       }
  66       return error;
  67     }
  68     int set_system_affinity(bool abort_on_error) const override {
  69       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
  70                   "Illegal get affinity operation when not capable");
  71       int retval =
  72           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
  73       if (retval >= 0) {
  74         return 0;
  75       }
  76       int error = errno;
  77       if (abort_on_error) {
  78         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
  79       }
  80       return error;
  81     }
  82     int get_proc_group() const override {
  83       int group = -1;
  84 #if KMP_OS_WINDOWS
  85       if (__kmp_num_proc_groups == 1) {
  86         return 1;
  87       }
  88       for (int i = 0; i < __kmp_num_proc_groups; i++) {
  89         // On windows, the long type is always 32 bits
  90         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
  91         unsigned long second_32_bits =
  92             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
  93         if (first_32_bits == 0 && second_32_bits == 0) {
  94           continue;
  95         }
  96         if (group >= 0) {
  97           return -1;
  98         }
  99         group = i;
 100       }
 101 #endif /* KMP_OS_WINDOWS */
 102       return group;
 103     }
 104   };
 105   void determine_capable(const char *var) override {
 106     const hwloc_topology_support *topology_support;
 107     if (__kmp_hwloc_topology == NULL) {
 108       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
 109         __kmp_hwloc_error = TRUE;
 110         if (__kmp_affinity_verbose)
 111           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
 112       }
 113       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
 114         __kmp_hwloc_error = TRUE;
 115         if (__kmp_affinity_verbose)
 116           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
 117       }
 118     }
 119     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
 120     // Is the system capable of setting/getting this thread's affinity?
 121     // Also, is topology discovery possible? (pu indicates ability to discover
 122     // processing units). And finally, were there no errors when calling any
 123     // hwloc_* API functions?
 124     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
 125         topology_support->cpubind->get_thisthread_cpubind &&
 126         topology_support->discovery->pu && !__kmp_hwloc_error) {
 127       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
 128       KMP_AFFINITY_ENABLE(TRUE);
 129     } else {
 130       // indicate that hwloc didn't work and disable affinity
 131       __kmp_hwloc_error = TRUE;
 132       KMP_AFFINITY_DISABLE();
 133     }
 134   }
 135   void bind_thread(int which) override {
 136     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
 137                 "Illegal set affinity operation when not capable");
 138     KMPAffinity::Mask *mask;
 139     KMP_CPU_ALLOC_ON_STACK(mask);
 140     KMP_CPU_ZERO(mask);
 141     KMP_CPU_SET(which, mask);
 142     __kmp_set_system_affinity(mask, TRUE);
 143     KMP_CPU_FREE_FROM_STACK(mask);
 144   }
 145   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
 146   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
 147   KMPAffinity::Mask *allocate_mask_array(int num) override {
 148     return new Mask[num];
 149   }
 150   void deallocate_mask_array(KMPAffinity::Mask *array) override {
 151     Mask *hwloc_array = static_cast<Mask *>(array);
 152     delete[] hwloc_array;
 153   }
 154   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
 155                                       int index) override {
 156     Mask *hwloc_array = static_cast<Mask *>(array);
 157     return &(hwloc_array[index]);
 158   }
 159   api_type get_api_type() const override { return HWLOC; }
 160 };
 161 #endif /* KMP_USE_HWLOC */
 162
 163 #if KMP_OS_LINUX || KMP_OS_FREEBSD
 164 #if KMP_OS_LINUX
 165 /* On some of the older OS's that we build on, these constants aren't present
 166    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
 167    all systems of the same arch where they are defined, and they cannot change.
 168    stone forever. */
 169 #include <sys/syscall.h>
 170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
 171 #ifndef __NR_sched_setaffinity
 172 #define __NR_sched_setaffinity 241
 173 #elif __NR_sched_setaffinity != 241
 174 #error Wrong code for setaffinity system call.
 175 #endif /* __NR_sched_setaffinity */
 176 #ifndef __NR_sched_getaffinity
 177 #define __NR_sched_getaffinity 242
 178 #elif __NR_sched_getaffinity != 242
 179 #error Wrong code for getaffinity system call.
 180 #endif /* __NR_sched_getaffinity */
 181 #elif KMP_ARCH_AARCH64
 182 #ifndef __NR_sched_setaffinity
 183 #define __NR_sched_setaffinity 122
 184 #elif __NR_sched_setaffinity != 122
 185 #error Wrong code for setaffinity system call.
 186 #endif /* __NR_sched_setaffinity */
 187 #ifndef __NR_sched_getaffinity
 188 #define __NR_sched_getaffinity 123
 189 #elif __NR_sched_getaffinity != 123
 190 #error Wrong code for getaffinity system call.
 191 #endif /* __NR_sched_getaffinity */
 192 #elif KMP_ARCH_X86_64
 193 #ifndef __NR_sched_setaffinity
 194 #define __NR_sched_setaffinity 203
 195 #elif __NR_sched_setaffinity != 203
 196 #error Wrong code for setaffinity system call.
 197 #endif /* __NR_sched_setaffinity */
 198 #ifndef __NR_sched_getaffinity
 199 #define __NR_sched_getaffinity 204
 200 #elif __NR_sched_getaffinity != 204
 201 #error Wrong code for getaffinity system call.
 202 #endif /* __NR_sched_getaffinity */
 203 #elif KMP_ARCH_PPC64
 204 #ifndef __NR_sched_setaffinity
 205 #define __NR_sched_setaffinity 222
 206 #elif __NR_sched_setaffinity != 222
 207 #error Wrong code for setaffinity system call.
 208 #endif /* __NR_sched_setaffinity */
 209 #ifndef __NR_sched_getaffinity
 210 #define __NR_sched_getaffinity 223
 211 #elif __NR_sched_getaffinity != 223
 212 #error Wrong code for getaffinity system call.
 213 #endif /* __NR_sched_getaffinity */
 214 #elif KMP_ARCH_MIPS
 215 #ifndef __NR_sched_setaffinity
 216 #define __NR_sched_setaffinity 4239
 217 #elif __NR_sched_setaffinity != 4239
 218 #error Wrong code for setaffinity system call.
 219 #endif /* __NR_sched_setaffinity */
 220 #ifndef __NR_sched_getaffinity
 221 #define __NR_sched_getaffinity 4240
 222 #elif __NR_sched_getaffinity != 4240
 223 #error Wrong code for getaffinity system call.
 224 #endif /* __NR_sched_getaffinity */
 225 #elif KMP_ARCH_MIPS64
 226 #ifndef __NR_sched_setaffinity
 227 #define __NR_sched_setaffinity 5195
 228 #elif __NR_sched_setaffinity != 5195
 229 #error Wrong code for setaffinity system call.
 230 #endif /* __NR_sched_setaffinity */
 231 #ifndef __NR_sched_getaffinity
 232 #define __NR_sched_getaffinity 5196
 233 #elif __NR_sched_getaffinity != 5196
 234 #error Wrong code for getaffinity system call.
 235 #endif /* __NR_sched_getaffinity */
 236 #error Unknown or unsupported architecture
 237 #endif /* KMP_ARCH_* */
 238 #elif KMP_OS_FREEBSD
 239 #include <pthread.h>
 240 #include <pthread_np.h>
 241 #endif
 242 class KMPNativeAffinity : public KMPAffinity {
 243   class Mask : public KMPAffinity::Mask {
 244     typedef unsigned char mask_t;
 245     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
 246
 247   public:
 248     mask_t *mask;
 249     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
 250     ~Mask() {
 251       if (mask)
 252         __kmp_free(mask);
 253     }
 254     void set(int i) override {
 255       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
 256     }
 257     bool is_set(int i) const override {
 258       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
 259     }
 260     void clear(int i) override {
 261       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
 262     }
 263     void zero() override {
 264       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
 265         mask[i] = 0;
 266     }
 267     void copy(const KMPAffinity::Mask *src) override {
 268       const Mask *convert = static_cast<const Mask *>(src);
 269       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
 270         mask[i] = convert->mask[i];
 271     }
 272     void bitwise_and(const KMPAffinity::Mask *rhs) override {
 273       const Mask *convert = static_cast<const Mask *>(rhs);
 274       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
 275         mask[i] &= convert->mask[i];
 276     }
 277     void bitwise_or(const KMPAffinity::Mask *rhs) override {
 278       const Mask *convert = static_cast<const Mask *>(rhs);
 279       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
 280         mask[i] |= convert->mask[i];
 281     }
 282     void bitwise_not() override {
 283       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
 284         mask[i] = ~(mask[i]);
 285     }
 286     int begin() const override {
 287       int retval = 0;
 288       while (retval < end() && !is_set(retval))
 289         ++retval;
 290       return retval;
 291     }
 292     int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
 293     int next(int previous) const override {
 294       int retval = previous + 1;
 295       while (retval < end() && !is_set(retval))
 296         ++retval;
 297       return retval;
 298     }
 299     int get_system_affinity(bool abort_on_error) override {
 300       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
 301                   "Illegal get affinity operation when not capable");
 302 #if KMP_OS_LINUX
 303       int retval =
 304           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
 305 #elif KMP_OS_FREEBSD
 306       int retval =
 307           pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
 308 #endif
 309       if (retval >= 0) {
 310         return 0;
 311       }
 312       int error = errno;
 313       if (abort_on_error) {
 314         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
 315       }
 316       return error;
 317     }
 318     int set_system_affinity(bool abort_on_error) const override {
 319       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
 320                   "Illegal get affinity operation when not capable");
 321 #if KMP_OS_LINUX
 322       int retval =
 323           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
 324 #elif KMP_OS_FREEBSD
 325       int retval =
 326           pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
 327 #endif
 328       if (retval >= 0) {
 329         return 0;
 330       }
 331       int error = errno;
 332       if (abort_on_error) {
 333         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
 334       }
 335       return error;
 336     }
 337   };
 338   void determine_capable(const char *env_var) override {
 339     __kmp_affinity_determine_capable(env_var);
 340   }
 341   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
 342   KMPAffinity::Mask *allocate_mask() override {
 343     KMPNativeAffinity::Mask *retval = new Mask();
 344     return retval;
 345   }
 346   void deallocate_mask(KMPAffinity::Mask *m) override {
 347     KMPNativeAffinity::Mask *native_mask =
 348         static_cast<KMPNativeAffinity::Mask *>(m);
 349     delete native_mask;
 350   }
 351   KMPAffinity::Mask *allocate_mask_array(int num) override {
 352     return new Mask[num];
 353   }
 354   void deallocate_mask_array(KMPAffinity::Mask *array) override {
 355     Mask *linux_array = static_cast<Mask *>(array);
 356     delete[] linux_array;
 357   }
 358   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
 359                                       int index) override {
 360     Mask *linux_array = static_cast<Mask *>(array);
 361     return &(linux_array[index]);
 362   }
 363   api_type get_api_type() const override { return NATIVE_OS; }
 364 };
 365 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
 366
 367 #if KMP_OS_WINDOWS
 368 class KMPNativeAffinity : public KMPAffinity {
 369   class Mask : public KMPAffinity::Mask {
 370     typedef ULONG_PTR mask_t;
 371     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
 372     mask_t *mask;
 373
 374   public:
 375     Mask() {
 376       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
 377     }
 378     ~Mask() {
 379       if (mask)
 380         __kmp_free(mask);
 381     }
 382     void set(int i) override {
 383       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
 384     }
 385     bool is_set(int i) const override {
 386       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
 387     }
 388     void clear(int i) override {
 389       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
 390     }
 391     void zero() override {
 392       for (int i = 0; i < __kmp_num_proc_groups; ++i)
 393         mask[i] = 0;
 394     }
 395     void copy(const KMPAffinity::Mask *src) override {
 396       const Mask *convert = static_cast<const Mask *>(src);
 397       for (int i = 0; i < __kmp_num_proc_groups; ++i)
 398         mask[i] = convert->mask[i];
 399     }
 400     void bitwise_and(const KMPAffinity::Mask *rhs) override {
 401       const Mask *convert = static_cast<const Mask *>(rhs);
 402       for (int i = 0; i < __kmp_num_proc_groups; ++i)
 403         mask[i] &= convert->mask[i];
 404     }
 405     void bitwise_or(const KMPAffinity::Mask *rhs) override {
 406       const Mask *convert = static_cast<const Mask *>(rhs);
 407       for (int i = 0; i < __kmp_num_proc_groups; ++i)
 408         mask[i] |= convert->mask[i];
 409     }
 410     void bitwise_not() override {
 411       for (int i = 0; i < __kmp_num_proc_groups; ++i)
 412         mask[i] = ~(mask[i]);
 413     }
 414     int begin() const override {
 415       int retval = 0;
 416       while (retval < end() && !is_set(retval))
 417         ++retval;
 418       return retval;
 419     }
 420     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
 421     int next(int previous) const override {
 422       int retval = previous + 1;
 423       while (retval < end() && !is_set(retval))
 424         ++retval;
 425       return retval;
 426     }
 427     int set_system_affinity(bool abort_on_error) const override {
 428       if (__kmp_num_proc_groups > 1) {
 429         // Check for a valid mask.
 430         GROUP_AFFINITY ga;
 431         int group = get_proc_group();
 432         if (group < 0) {
 433           if (abort_on_error) {
 434             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
 435           }
 436           return -1;
 437         }
 438         // Transform the bit vector into a GROUP_AFFINITY struct
 439         // and make the system call to set affinity.
 440         ga.Group = group;
 441         ga.Mask = mask[group];
 442         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
 443
 444         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
 445         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
 446           DWORD error = GetLastError();
 447           if (abort_on_error) {
 448             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
 449                         __kmp_msg_null);
 450           }
 451           return error;
 452         }
 453       } else {
 454         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
 455           DWORD error = GetLastError();
 456           if (abort_on_error) {
 457             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
 458                         __kmp_msg_null);
 459           }
 460           return error;
 461         }
 462       }
 463       return 0;
 464     }
 465     int get_system_affinity(bool abort_on_error) override {
 466       if (__kmp_num_proc_groups > 1) {
 467         this->zero();
 468         GROUP_AFFINITY ga;
 469         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
 470         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
 471           DWORD error = GetLastError();
 472           if (abort_on_error) {
 473             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
 474                         KMP_ERR(error), __kmp_msg_null);
 475           }
 476           return error;
 477         }
 478         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
 479             (ga.Mask == 0)) {
 480           return -1;
 481         }
 482         mask[ga.Group] = ga.Mask;
 483       } else {
 484         mask_t newMask, sysMask, retval;
 485         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
 486           DWORD error = GetLastError();
 487           if (abort_on_error) {
 488             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
 489                         KMP_ERR(error), __kmp_msg_null);
 490           }
 491           return error;
 492         }
 493         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
 494         if (!retval) {
 495           DWORD error = GetLastError();
 496           if (abort_on_error) {
 497             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
 498                         KMP_ERR(error), __kmp_msg_null);
 499           }
 500           return error;
 501         }
 502         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
 503         if (!newMask) {
 504           DWORD error = GetLastError();
 505           if (abort_on_error) {
 506             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
 507                         KMP_ERR(error), __kmp_msg_null);
 508           }
 509         }
 510         *mask = retval;
 511       }
 512       return 0;
 513     }
 514     int get_proc_group() const override {
 515       int group = -1;
 516       if (__kmp_num_proc_groups == 1) {
 517         return 1;
 518       }
 519       for (int i = 0; i < __kmp_num_proc_groups; i++) {
 520         if (mask[i] == 0)
 521           continue;
 522         if (group >= 0)
 523           return -1;
 524         group = i;
 525       }
 526       return group;
 527     }
 528   };
 529   void determine_capable(const char *env_var) override {
 530     __kmp_affinity_determine_capable(env_var);
 531   }
 532   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
 533   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
 534   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
 535   KMPAffinity::Mask *allocate_mask_array(int num) override {
 536     return new Mask[num];
 537   }
 538   void deallocate_mask_array(KMPAffinity::Mask *array) override {
 539     Mask *windows_array = static_cast<Mask *>(array);
 540     delete[] windows_array;
 541   }
 542   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
 543                                       int index) override {
 544     Mask *windows_array = static_cast<Mask *>(array);
 545     return &(windows_array[index]);
 546   }
 547   api_type get_api_type() const override { return NATIVE_OS; }
 548 };
 549 #endif /* KMP_OS_WINDOWS */
 550 #endif /* KMP_AFFINITY_SUPPORTED */
 551
 552 class Address {
 553 public:
 554   static const unsigned maxDepth = 32;
 555   unsigned labels[maxDepth];
 556   unsigned childNums[maxDepth];
 557   unsigned depth;
 558   unsigned leader;
 559   Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
 560   Address &operator=(const Address &b) {
 561     depth = b.depth;
 562     for (unsigned i = 0; i < depth; i++) {
 563       labels[i] = b.labels[i];
 564       childNums[i] = b.childNums[i];
 565     }
 566     leader = FALSE;
 567     return *this;
 568   }
 569   bool operator==(const Address &b) const {
 570     if (depth != b.depth)
 571       return false;
 572     for (unsigned i = 0; i < depth; i++)
 573       if (labels[i] != b.labels[i])
 574         return false;
 575     return true;
 576   }
 577   bool isClose(const Address &b, int level) const {
 578     if (depth != b.depth)
 579       return false;
 580     if ((unsigned)level >= depth)
 581       return true;
 582     for (unsigned i = 0; i < (depth - level); i++)
 583       if (labels[i] != b.labels[i])
 584         return false;
 585     return true;
 586   }
 587   bool operator!=(const Address &b) const { return !operator==(b); }
 588   void print() const {
 589     unsigned i;
 590     printf("Depth: %u --- ", depth);
 591     for (i = 0; i < depth; i++) {
 592       printf("%u ", labels[i]);
 593     }
 594   }
 595 };
 596
 597 class AddrUnsPair {
 598 public:
 599   Address first;
 600   unsigned second;
 601   AddrUnsPair(Address _first, unsigned _second)
 602       : first(_first), second(_second) {}
 603   AddrUnsPair &operator=(const AddrUnsPair &b) {
 604     first = b.first;
 605     second = b.second;
 606     return *this;
 607   }
 608   void print() const {
 609     printf("first = ");
 610     first.print();
 611     printf(" --- second = %u", second);
 612   }
 613   bool operator==(const AddrUnsPair &b) const {
 614     if (first != b.first)
 615       return false;
 616     if (second != b.second)
 617       return false;
 618     return true;
 619   }
 620   bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
 621 };
 622
 623 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
 624   const Address *aa = &(((const AddrUnsPair *)a)->first);
 625   const Address *bb = &(((const AddrUnsPair *)b)->first);
 626   unsigned depth = aa->depth;
 627   unsigned i;
 628   KMP_DEBUG_ASSERT(depth == bb->depth);
 629   for (i = 0; i < depth; i++) {
 630     if (aa->labels[i] < bb->labels[i])
 631       return -1;
 632     if (aa->labels[i] > bb->labels[i])
 633       return 1;
 634   }
 635   return 0;
 636 }
 637
 638 /* A structure for holding machine-specific hierarchy info to be computed once
 639    at init. This structure represents a mapping of threads to the actual machine
 640    hierarchy, or to our best guess at what the hierarchy might be, for the
 641    purpose of performing an efficient barrier. In the worst case, when there is
 642    no machine hierarchy information, it produces a tree suitable for a barrier,
 643    similar to the tree used in the hyper barrier. */
 644 class hierarchy_info {
 645 public:
 646   /* Good default values for number of leaves and branching factor, given no
 647      affinity information. Behaves a bit like hyper barrier. */
 648   static const kmp_uint32 maxLeaves = 4;
 649   static const kmp_uint32 minBranch = 4;
 650   /** Number of levels in the hierarchy. Typical levels are threads/core,
 651       cores/package or socket, packages/node, nodes/machine, etc. We don't want
 652       to get specific with nomenclature. When the machine is oversubscribed we
 653       add levels to duplicate the hierarchy, doubling the thread capacity of the
 654       hierarchy each time we add a level. */
 655   kmp_uint32 maxLevels;
 656
 657   /** This is specifically the depth of the machine configuration hierarchy, in
 658       terms of the number of levels along the longest path from root to any
 659       leaf. It corresponds to the number of entries in numPerLevel if we exclude
 660       all but one trailing 1. */
 661   kmp_uint32 depth;
 662   kmp_uint32 base_num_threads;
 663   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
 664   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
 665   // 2=initialization in progress
 666   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
 667
 668   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
 669       the parent of a node at level i has. For example, if we have a machine
 670       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
 671       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
 672   kmp_uint32 *numPerLevel;
 673   kmp_uint32 *skipPerLevel;
 674
 675   void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
 676     int hier_depth = adr2os[0].first.depth;
 677     int level = 0;
 678     for (int i = hier_depth - 1; i >= 0; --i) {
 679       int max = -1;
 680       for (int j = 0; j < num_addrs; ++j) {
 681         int next = adr2os[j].first.childNums[i];
 682         if (next > max)
 683           max = next;
 684       }
 685       numPerLevel[level] = max + 1;
 686       ++level;
 687     }
 688   }
 689
 690   hierarchy_info()
 691       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
 692
 693   void fini() {
 694     if (!uninitialized && numPerLevel) {
 695       __kmp_free(numPerLevel);
 696       numPerLevel = NULL;
 697       uninitialized = not_initialized;
 698     }
 699   }
 700
 701   void init(AddrUnsPair *adr2os, int num_addrs) {
 702     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
 703         &uninitialized, not_initialized, initializing);
 704     if (bool_result == 0) { // Wait for initialization
 705       while (TCR_1(uninitialized) != initialized)
 706         KMP_CPU_PAUSE();
 707       return;
 708     }
 709     KMP_DEBUG_ASSERT(bool_result == 1);
 710
 711     /* Added explicit initialization of the data fields here to prevent usage of
 712        dirty value observed when static library is re-initialized multiple times
 713        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
 714        OpenMP). */
 715     depth = 1;
 716     resizing = 0;
 717     maxLevels = 7;
 718     numPerLevel =
 719         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
 720     skipPerLevel = &(numPerLevel[maxLevels]);
 721     for (kmp_uint32 i = 0; i < maxLevels;
 722          ++i) { // init numPerLevel[*] to 1 item per level
 723       numPerLevel[i] = 1;
 724       skipPerLevel[i] = 1;
 725     }
 726
 727     // Sort table by physical ID
 728     if (adr2os) {
 729       qsort(adr2os, num_addrs, sizeof(*adr2os),
 730             __kmp_affinity_cmp_Address_labels);
 731       deriveLevels(adr2os, num_addrs);
 732     } else {
 733       numPerLevel[0] = maxLeaves;
 734       numPerLevel[1] = num_addrs / maxLeaves;
 735       if (num_addrs % maxLeaves)
 736         numPerLevel[1]++;
 737     }
 738
 739     base_num_threads = num_addrs;
 740     for (int i = maxLevels - 1; i >= 0;
 741          --i) // count non-empty levels to get depth
 742       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
 743         depth++;
 744
 745     kmp_uint32 branch = minBranch;
 746     if (numPerLevel[0] == 1)
 747       branch = num_addrs / maxLeaves;
 748     if (branch < minBranch)
 749       branch = minBranch;
 750     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
 751       while (numPerLevel[d] > branch ||
 752              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
 753         if (numPerLevel[d] & 1)
 754           numPerLevel[d]++;
 755         numPerLevel[d] = numPerLevel[d] >> 1;
 756         if (numPerLevel[d + 1] == 1)
 757           depth++;
 758         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
 759       }
 760       if (numPerLevel[0] == 1) {
 761         branch = branch >> 1;
 762         if (branch < 4)
 763           branch = minBranch;
 764       }
 765     }
 766
 767     for (kmp_uint32 i = 1; i < depth; ++i)
 768       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
 769     // Fill in hierarchy in the case of oversubscription
 770     for (kmp_uint32 i = depth; i < maxLevels; ++i)
 771       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
 772
 773     uninitialized = initialized; // One writer
 774   }
 775
 776   // Resize the hierarchy if nproc changes to something larger than before
 777   void resize(kmp_uint32 nproc) {
 778     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
 779     while (bool_result == 0) { // someone else is trying to resize
 780       KMP_CPU_PAUSE();
 781       if (nproc <= base_num_threads) // happy with other thread's resize
 782         return;
 783       else // try to resize
 784         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
 785     }
 786     KMP_DEBUG_ASSERT(bool_result != 0);
 787     if (nproc <= base_num_threads)
 788       return; // happy with other thread's resize
 789
 790     // Calculate new maxLevels
 791     kmp_uint32 old_sz = skipPerLevel[depth - 1];
 792     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
 793     // First see if old maxLevels is enough to contain new size
 794     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
 795       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
 796       numPerLevel[i - 1] *= 2;
 797       old_sz *= 2;
 798       depth++;
 799     }
 800     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
 801       while (nproc > old_sz) {
 802         old_sz *= 2;
 803         incs++;
 804         depth++;
 805       }
 806       maxLevels += incs;
 807
 808       // Resize arrays
 809       kmp_uint32 *old_numPerLevel = numPerLevel;
 810       kmp_uint32 *old_skipPerLevel = skipPerLevel;
 811       numPerLevel = skipPerLevel = NULL;
 812       numPerLevel =
 813           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
 814       skipPerLevel = &(numPerLevel[maxLevels]);
 815
 816       // Copy old elements from old arrays
 817       for (kmp_uint32 i = 0; i < old_maxLevels;
 818            ++i) { // init numPerLevel[*] to 1 item per level
 819         numPerLevel[i] = old_numPerLevel[i];
 820         skipPerLevel[i] = old_skipPerLevel[i];
 821       }
 822
 823       // Init new elements in arrays to 1
 824       for (kmp_uint32 i = old_maxLevels; i < maxLevels;
 825            ++i) { // init numPerLevel[*] to 1 item per level
 826         numPerLevel[i] = 1;
 827         skipPerLevel[i] = 1;
 828       }
 829
 830       // Free old arrays
 831       __kmp_free(old_numPerLevel);
 832     }
 833
 834     // Fill in oversubscription levels of hierarchy
 835     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
 836       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
 837
 838     base_num_threads = nproc;
 839     resizing = 0; // One writer
 840   }
 841 };
 842 #endif // KMP_AFFINITY_H