1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library, for x86 only.
13 //===----------------------------------------------------------------------===//
15 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
17 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
29 #ifndef __has_attribute
30 #define __has_attribute(attr) 0
33 enum VendorSignatures {
34 SIG_INTEL = 0x756e6547, // Genu
35 SIG_AMD = 0x68747541, // Auth
38 enum ProcessorVendors {
63 enum ProcessorSubtypes {
64 INTEL_COREI7_NEHALEM = 1,
65 INTEL_COREI7_WESTMERE,
66 INTEL_COREI7_SANDYBRIDGE,
75 INTEL_COREI7_IVYBRIDGE,
77 INTEL_COREI7_BROADWELL,
79 INTEL_COREI7_SKYLAKE_AVX512,
80 INTEL_COREI7_CANNONLAKE,
81 INTEL_COREI7_ICELAKE_CLIENT,
82 INTEL_COREI7_ICELAKE_SERVER,
84 INTEL_COREI7_CASCADELAKE,
85 INTEL_COREI7_TIGERLAKE,
86 INTEL_COREI7_COOPERLAKE,
90 enum ProcessorFeatures {
119 FEATURE_AVX5124VNNIW,
120 FEATURE_AVX5124FMAPS,
121 FEATURE_AVX512VPOPCNTDQ,
126 FEATURE_AVX512BITALG,
128 FEATURE_AVX512VP2INTERSECT,
132 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
133 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
134 // support. Consequently, for i386, the presence of CPUID is checked first
135 // via the corresponding eflags bit.
136 static bool isCpuIdSupported() {
137 #if defined(__GNUC__) || defined(__clang__)
138 #if defined(__i386__)
139 int __cpuid_supported;
142 " movl %%eax,%%ecx\n"
143 " xorl $0x00200000,%%eax\n"
149 " cmpl %%eax,%%ecx\n"
153 : "=r"(__cpuid_supported)
156 if (!__cpuid_supported)
164 // This code is copied from lib/Support/Host.cpp.
165 // Changes to either file should be mirrored in the other.
167 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
168 /// the specified arguments. If we can't run cpuid on the host, return true.
169 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
170 unsigned *rECX, unsigned *rEDX) {
171 #if defined(__GNUC__) || defined(__clang__)
172 #if defined(__x86_64__)
173 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
174 // FIXME: should we save this for Clang?
175 __asm__("movq\t%%rbx, %%rsi\n\t"
177 "xchgq\t%%rbx, %%rsi\n\t"
178 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
181 #elif defined(__i386__)
182 __asm__("movl\t%%ebx, %%esi\n\t"
184 "xchgl\t%%ebx, %%esi\n\t"
185 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
191 #elif defined(_MSC_VER)
192 // The MSVC intrinsic is portable across x86 and x64.
194 __cpuid(registers, value);
195 *rEAX = registers[0];
196 *rEBX = registers[1];
197 *rECX = registers[2];
198 *rEDX = registers[3];
205 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
206 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
208 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
209 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
211 #if defined(__GNUC__) || defined(__clang__)
212 #if defined(__x86_64__)
213 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
214 // FIXME: should we save this for Clang?
215 __asm__("movq\t%%rbx, %%rsi\n\t"
217 "xchgq\t%%rbx, %%rsi\n\t"
218 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
219 : "a"(value), "c"(subleaf));
221 #elif defined(__i386__)
222 __asm__("movl\t%%ebx, %%esi\n\t"
224 "xchgl\t%%ebx, %%esi\n\t"
225 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
226 : "a"(value), "c"(subleaf));
231 #elif defined(_MSC_VER)
233 __cpuidex(registers, value, subleaf);
234 *rEAX = registers[0];
235 *rEBX = registers[1];
236 *rECX = registers[2];
237 *rEDX = registers[3];
244 // Read control register 0 (XCR0). Used to detect features such as AVX.
245 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
246 #if defined(__GNUC__) || defined(__clang__)
247 // Check xgetbv; this uses a .byte sequence instead of the instruction
248 // directly because older assemblers do not include support for xgetbv and
249 // there is no easy way to conditionally compile based on the assembler used.
250 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
252 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
253 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
255 *rEDX = Result >> 32;
262 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
264 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
265 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
266 if (*Family == 6 || *Family == 0xf) {
268 // Examine extended family ID if family ID is F.
269 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
270 // Examine extended model ID if family ID is 6 or F.
271 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
276 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
277 const unsigned *Features,
278 unsigned *Type, unsigned *Subtype) {
279 #define testFeature(F) \
280 (Features[F / 32] & (F % 32)) != 0
282 // We select CPU strings to match the code in Host.cpp, but we don't use them
289 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
290 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
291 // mobile processor, Intel Core 2 Extreme processor, Intel
292 // Pentium Dual-Core processor, Intel Xeon processor, model
293 // 0Fh. All processors are manufactured using the 65 nm process.
294 case 0x16: // Intel Celeron processor model 16h. All processors are
295 // manufactured using the 65 nm process
299 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
300 // 17h. All processors are manufactured using the 45 nm process.
302 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
303 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
304 // the 45 nm process.
308 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
309 // processors are manufactured using the 45 nm process.
310 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
311 // As found in a Summer 2010 model iMac.
313 case 0x2e: // Nehalem EX
315 *Type = INTEL_COREI7;
316 *Subtype = INTEL_COREI7_NEHALEM;
318 case 0x25: // Intel Core i7, laptop version.
319 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
320 // processors are manufactured using the 32 nm process.
321 case 0x2f: // Westmere EX
323 *Type = INTEL_COREI7;
324 *Subtype = INTEL_COREI7_WESTMERE;
326 case 0x2a: // Intel Core i7 processor. All processors are manufactured
327 // using the 32 nm process.
330 *Type = INTEL_COREI7;
331 *Subtype = INTEL_COREI7_SANDYBRIDGE;
334 case 0x3e: // Ivy Bridge EP
336 *Type = INTEL_COREI7;
337 *Subtype = INTEL_COREI7_IVYBRIDGE;
346 *Type = INTEL_COREI7;
347 *Subtype = INTEL_COREI7_HASWELL;
356 *Type = INTEL_COREI7;
357 *Subtype = INTEL_COREI7_BROADWELL;
361 case 0x4e: // Skylake mobile
362 case 0x5e: // Skylake desktop
363 case 0x8e: // Kaby Lake mobile
364 case 0x9e: // Kaby Lake desktop
365 case 0xa5: // Comet Lake-H/S
366 case 0xa6: // Comet Lake-U
368 *Type = INTEL_COREI7;
369 *Subtype = INTEL_COREI7_SKYLAKE;
374 *Type = INTEL_COREI7;
375 if (testFeature(FEATURE_AVX512BF16)) {
377 *Subtype = INTEL_COREI7_COOPERLAKE;
378 } else if (testFeature(FEATURE_AVX512VNNI)) {
380 *Subtype = INTEL_COREI7_CASCADELAKE;
382 CPU = "skylake-avx512";
383 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
390 *Type = INTEL_COREI7;
391 *Subtype = INTEL_COREI7_CANNONLAKE;
397 CPU = "icelake-client";
398 *Type = INTEL_COREI7;
399 *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
405 CPU = "icelake-server";
406 *Type = INTEL_COREI7;
407 *Subtype = INTEL_COREI7_ICELAKE_SERVER;
410 case 0x1c: // Most 45 nm Intel Atom processors
411 case 0x26: // 45 nm Atom Lincroft
412 case 0x27: // 32 nm Atom Medfield
413 case 0x35: // 32 nm Atom Midview
414 case 0x36: // 32 nm Atom Midview
416 *Type = INTEL_BONNELL;
419 // Atom Silvermont codes from the Intel software optimization guide.
425 case 0x4c: // really airmont
427 *Type = INTEL_SILVERMONT;
430 case 0x5c: // Apollo Lake
431 case 0x5f: // Denverton
433 *Type = INTEL_GOLDMONT;
436 CPU = "goldmont-plus";
437 *Type = INTEL_GOLDMONT_PLUS;
441 *Type = INTEL_TREMONT;
454 default: // Unknown family 6 CPU.
466 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
467 const unsigned *Features,
468 unsigned *Type, unsigned *Subtype) {
469 // We select CPU strings to match the code in Host.cpp, but we don't use them
479 *Subtype = AMDFAM10H_BARCELONA;
482 *Subtype = AMDFAM10H_SHANGHAI;
485 *Subtype = AMDFAM10H_ISTANBUL;
496 if (Model >= 0x60 && Model <= 0x7f) {
498 *Subtype = AMDFAM15H_BDVER4;
499 break; // 60h-7Fh: Excavator
501 if (Model >= 0x30 && Model <= 0x3f) {
503 *Subtype = AMDFAM15H_BDVER3;
504 break; // 30h-3Fh: Steamroller
506 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
508 *Subtype = AMDFAM15H_BDVER2;
509 break; // 02h, 10h-1Fh: Piledriver
512 *Subtype = AMDFAM15H_BDVER1;
513 break; // 00h-0Fh: Bulldozer
523 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
525 *Subtype = AMDFAM17H_ZNVER2;
526 break; // 30h-3fh, 71h: Zen2
529 *Subtype = AMDFAM17H_ZNVER1;
530 break; // 00h-0Fh: Zen1
534 break; // Unknown AMD CPU.
540 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
541 unsigned *Features) {
544 #define setFeature(F) \
545 Features[F / 32] |= 1U << (F % 32)
548 setFeature(FEATURE_CMOV);
550 setFeature(FEATURE_MMX);
552 setFeature(FEATURE_SSE);
554 setFeature(FEATURE_SSE2);
557 setFeature(FEATURE_SSE3);
559 setFeature(FEATURE_PCLMUL);
561 setFeature(FEATURE_SSSE3);
563 setFeature(FEATURE_FMA);
565 setFeature(FEATURE_SSE4_1);
567 setFeature(FEATURE_SSE4_2);
569 setFeature(FEATURE_POPCNT);
571 setFeature(FEATURE_AES);
573 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
574 // indicates that the AVX registers will be saved and restored on context
575 // switch, then we have full AVX support.
576 const unsigned AVXBits = (1 << 27) | (1 << 28);
577 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
578 ((EAX & 0x6) == 0x6);
579 #if defined(__APPLE__)
580 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
581 // save the AVX512 context if we use AVX512 instructions, even the bit is not
583 bool HasAVX512Save = true;
585 // AVX512 requires additional context to be saved by the OS.
586 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
590 setFeature(FEATURE_AVX);
593 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
595 if (HasLeaf7 && ((EBX >> 3) & 1))
596 setFeature(FEATURE_BMI);
597 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
598 setFeature(FEATURE_AVX2);
599 if (HasLeaf7 && ((EBX >> 8) & 1))
600 setFeature(FEATURE_BMI2);
601 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
602 setFeature(FEATURE_AVX512F);
603 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
604 setFeature(FEATURE_AVX512DQ);
605 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
606 setFeature(FEATURE_AVX512IFMA);
607 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
608 setFeature(FEATURE_AVX512PF);
609 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
610 setFeature(FEATURE_AVX512ER);
611 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
612 setFeature(FEATURE_AVX512CD);
613 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
614 setFeature(FEATURE_AVX512BW);
615 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
616 setFeature(FEATURE_AVX512VL);
618 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
619 setFeature(FEATURE_AVX512VBMI);
620 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
621 setFeature(FEATURE_AVX512VBMI2);
622 if (HasLeaf7 && ((ECX >> 8) & 1))
623 setFeature(FEATURE_GFNI);
624 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
625 setFeature(FEATURE_VPCLMULQDQ);
626 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
627 setFeature(FEATURE_AVX512VNNI);
628 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
629 setFeature(FEATURE_AVX512BITALG);
630 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
631 setFeature(FEATURE_AVX512VPOPCNTDQ);
633 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
634 setFeature(FEATURE_AVX5124VNNIW);
635 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
636 setFeature(FEATURE_AVX5124FMAPS);
637 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
638 setFeature(FEATURE_AVX512VP2INTERSECT);
640 bool HasLeaf7Subleaf1 =
641 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
642 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
643 setFeature(FEATURE_AVX512BF16);
645 unsigned MaxExtLevel;
646 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
648 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
649 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
650 if (HasExtLeaf1 && ((ECX >> 6) & 1))
651 setFeature(FEATURE_SSE4_A);
652 if (HasExtLeaf1 && ((ECX >> 11) & 1))
653 setFeature(FEATURE_XOP);
654 if (HasExtLeaf1 && ((ECX >> 16) & 1))
655 setFeature(FEATURE_FMA4);
659 #if defined(HAVE_INIT_PRIORITY)
660 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
661 #elif __has_attribute(__constructor__)
662 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
664 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
665 // this runs during initialization.
666 #define CONSTRUCTOR_ATTRIBUTE
670 __attribute__((visibility("hidden")))
672 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
675 __attribute__((visibility("hidden")))
677 struct __processor_model {
678 unsigned int __cpu_vendor;
679 unsigned int __cpu_type;
680 unsigned int __cpu_subtype;
681 unsigned int __cpu_features[1];
682 } __cpu_model = {0, 0, 0, {0}};
685 __attribute__((visibility("hidden")))
687 unsigned int __cpu_features2 = 0;
689 // A constructor function that is sets __cpu_model and __cpu_features2 with
690 // the right values. This needs to run only once. This constructor is
691 // given the highest priority and it should run before constructors without
692 // the priority set. However, it still runs after ifunc initializers and
693 // needs to be called explicitly there.
695 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
696 unsigned EAX, EBX, ECX, EDX;
697 unsigned MaxLeaf = 5;
699 unsigned Model, Family;
700 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
702 // This function needs to run just once.
703 if (__cpu_model.__cpu_vendor)
706 if (!isCpuIdSupported() ||
707 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
708 __cpu_model.__cpu_vendor = VENDOR_OTHER;
712 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
713 detectX86FamilyModel(EAX, &Family, &Model);
715 // Find available features.
716 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
718 assert((sizeof(Features)/sizeof(Features[0])) == 2);
719 __cpu_model.__cpu_features[0] = Features[0];
720 __cpu_features2 = Features[1];
722 if (Vendor == SIG_INTEL) {
724 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
725 &(__cpu_model.__cpu_type),
726 &(__cpu_model.__cpu_subtype));
727 __cpu_model.__cpu_vendor = VENDOR_INTEL;
728 } else if (Vendor == SIG_AMD) {
730 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
731 &(__cpu_model.__cpu_type),
732 &(__cpu_model.__cpu_subtype));
733 __cpu_model.__cpu_vendor = VENDOR_AMD;
735 __cpu_model.__cpu_vendor = VENDOR_OTHER;
737 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
738 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
739 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);