1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is based on LLVM's lib/Support/Host.cpp.
11 // It implements the operating system Host concept and builtin
12 // __cpu_model for the compiler_rt library, for x86 only.
14 //===----------------------------------------------------------------------===//
16 #if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
30 #ifndef __has_attribute
31 #define __has_attribute(attr) 0
34 enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
39 enum ProcessorVendors {
64 enum ProcessorSubtypes {
65 INTEL_COREI7_NEHALEM = 1,
66 INTEL_COREI7_WESTMERE,
67 INTEL_COREI7_SANDYBRIDGE,
76 INTEL_COREI7_IVYBRIDGE,
78 INTEL_COREI7_BROADWELL,
80 INTEL_COREI7_SKYLAKE_AVX512,
81 INTEL_COREI7_CANNONLAKE,
82 INTEL_COREI7_ICELAKE_CLIENT,
83 INTEL_COREI7_ICELAKE_SERVER,
87 enum ProcessorFeatures {
116 FEATURE_AVX5124VNNIW,
117 FEATURE_AVX5124FMAPS,
118 FEATURE_AVX512VPOPCNTDQ,
126 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
127 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
128 // support. Consequently, for i386, the presence of CPUID is checked first
129 // via the corresponding eflags bit.
130 static bool isCpuIdSupported() {
131 #if defined(__GNUC__) || defined(__clang__)
132 #if defined(__i386__)
133 int __cpuid_supported;
136 " movl %%eax,%%ecx\n"
137 " xorl $0x00200000,%%eax\n"
143 " cmpl %%eax,%%ecx\n"
147 : "=r"(__cpuid_supported)
150 if (!__cpuid_supported)
158 // This code is copied from lib/Support/Host.cpp.
159 // Changes to either file should be mirrored in the other.
161 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
162 /// the specified arguments. If we can't run cpuid on the host, return true.
163 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
164 unsigned *rECX, unsigned *rEDX) {
165 #if defined(__GNUC__) || defined(__clang__)
166 #if defined(__x86_64__)
167 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
168 // FIXME: should we save this for Clang?
169 __asm__("movq\t%%rbx, %%rsi\n\t"
171 "xchgq\t%%rbx, %%rsi\n\t"
172 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
175 #elif defined(__i386__)
176 __asm__("movl\t%%ebx, %%esi\n\t"
178 "xchgl\t%%ebx, %%esi\n\t"
179 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
185 #elif defined(_MSC_VER)
186 // The MSVC intrinsic is portable across x86 and x64.
188 __cpuid(registers, value);
189 *rEAX = registers[0];
190 *rEBX = registers[1];
191 *rECX = registers[2];
192 *rEDX = registers[3];
199 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
200 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
202 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
203 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
205 #if defined(__GNUC__) || defined(__clang__)
206 #if defined(__x86_64__)
207 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
208 // FIXME: should we save this for Clang?
209 __asm__("movq\t%%rbx, %%rsi\n\t"
211 "xchgq\t%%rbx, %%rsi\n\t"
212 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
213 : "a"(value), "c"(subleaf));
215 #elif defined(__i386__)
216 __asm__("movl\t%%ebx, %%esi\n\t"
218 "xchgl\t%%ebx, %%esi\n\t"
219 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
220 : "a"(value), "c"(subleaf));
225 #elif defined(_MSC_VER)
227 __cpuidex(registers, value, subleaf);
228 *rEAX = registers[0];
229 *rEBX = registers[1];
230 *rECX = registers[2];
231 *rEDX = registers[3];
238 // Read control register 0 (XCR0). Used to detect features such as AVX.
239 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
240 #if defined(__GNUC__) || defined(__clang__)
241 // Check xgetbv; this uses a .byte sequence instead of the instruction
242 // directly because older assemblers do not include support for xgetbv and
243 // there is no easy way to conditionally compile based on the assembler used.
244 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
246 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
247 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
249 *rEDX = Result >> 32;
256 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
258 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
259 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
260 if (*Family == 6 || *Family == 0xf) {
262 // Examine extended family ID if family ID is F.
263 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
264 // Examine extended model ID if family ID is 6 or F.
265 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
270 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
271 unsigned Brand_id, unsigned Features,
272 unsigned *Type, unsigned *Subtype) {
278 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
279 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
280 // mobile processor, Intel Core 2 Extreme processor, Intel
281 // Pentium Dual-Core processor, Intel Xeon processor, model
282 // 0Fh. All processors are manufactured using the 65 nm process.
283 case 0x16: // Intel Celeron processor model 16h. All processors are
284 // manufactured using the 65 nm process
285 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
286 // 17h. All processors are manufactured using the 45 nm process.
288 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
289 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
290 // the 45 nm process.
291 *Type = INTEL_CORE2; // "penryn"
293 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
294 // processors are manufactured using the 45 nm process.
295 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
296 // As found in a Summer 2010 model iMac.
298 case 0x2e: // Nehalem EX
299 *Type = INTEL_COREI7; // "nehalem"
300 *Subtype = INTEL_COREI7_NEHALEM;
302 case 0x25: // Intel Core i7, laptop version.
303 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
304 // processors are manufactured using the 32 nm process.
305 case 0x2f: // Westmere EX
306 *Type = INTEL_COREI7; // "westmere"
307 *Subtype = INTEL_COREI7_WESTMERE;
309 case 0x2a: // Intel Core i7 processor. All processors are manufactured
310 // using the 32 nm process.
312 *Type = INTEL_COREI7; //"sandybridge"
313 *Subtype = INTEL_COREI7_SANDYBRIDGE;
316 case 0x3e: // Ivy Bridge EP
317 *Type = INTEL_COREI7; // "ivybridge"
318 *Subtype = INTEL_COREI7_IVYBRIDGE;
326 *Type = INTEL_COREI7; // "haswell"
327 *Subtype = INTEL_COREI7_HASWELL;
335 *Type = INTEL_COREI7; // "broadwell"
336 *Subtype = INTEL_COREI7_BROADWELL;
340 case 0x4e: // Skylake mobile
341 case 0x5e: // Skylake desktop
342 case 0x8e: // Kaby Lake mobile
343 case 0x9e: // Kaby Lake desktop
344 *Type = INTEL_COREI7; // "skylake"
345 *Subtype = INTEL_COREI7_SKYLAKE;
350 *Type = INTEL_COREI7;
351 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
356 *Type = INTEL_COREI7;
357 *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
360 case 0x1c: // Most 45 nm Intel Atom processors
361 case 0x26: // 45 nm Atom Lincroft
362 case 0x27: // 32 nm Atom Medfield
363 case 0x35: // 32 nm Atom Midview
364 case 0x36: // 32 nm Atom Midview
365 *Type = INTEL_BONNELL;
368 // Atom Silvermont codes from the Intel software optimization guide.
374 case 0x4c: // really airmont
375 *Type = INTEL_SILVERMONT;
376 break; // "silvermont"
378 case 0x5c: // Apollo Lake
379 case 0x5f: // Denverton
380 *Type = INTEL_GOLDMONT;
383 *Type = INTEL_GOLDMONT_PLUS;
387 *Type = INTEL_KNL; // knl
391 *Type = INTEL_KNM; // knm
394 default: // Unknown family 6 CPU.
403 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
404 unsigned Features, unsigned *Type,
406 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
407 // appears to be no way to generate the wide variety of AMD-specific targets
408 // from the information returned from CPUID.
411 *Type = AMDFAM10H; // "amdfam10"
414 *Subtype = AMDFAM10H_BARCELONA;
417 *Subtype = AMDFAM10H_SHANGHAI;
420 *Subtype = AMDFAM10H_ISTANBUL;
429 if (Model >= 0x60 && Model <= 0x7f) {
430 *Subtype = AMDFAM15H_BDVER4;
431 break; // "bdver4"; 60h-7Fh: Excavator
433 if (Model >= 0x30 && Model <= 0x3f) {
434 *Subtype = AMDFAM15H_BDVER3;
435 break; // "bdver3"; 30h-3Fh: Steamroller
437 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
438 *Subtype = AMDFAM15H_BDVER2;
439 break; // "bdver2"; 02h, 10h-1Fh: Piledriver
442 *Subtype = AMDFAM15H_BDVER1;
443 break; // "bdver1"; 00h-0Fh: Bulldozer
451 *Subtype = AMDFAM17H_ZNVER1;
458 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
459 unsigned *FeaturesOut,
460 unsigned *Features2Out) {
461 unsigned Features = 0;
462 unsigned Features2 = 0;
465 #define setFeature(F) \
468 Features |= 1U << (F & 0x1f); \
470 Features2 |= 1U << ((F - 32) & 0x1f); \
474 setFeature(FEATURE_CMOV);
476 setFeature(FEATURE_MMX);
478 setFeature(FEATURE_SSE);
480 setFeature(FEATURE_SSE2);
483 setFeature(FEATURE_SSE3);
485 setFeature(FEATURE_PCLMUL);
487 setFeature(FEATURE_SSSE3);
489 setFeature(FEATURE_FMA);
491 setFeature(FEATURE_SSE4_1);
493 setFeature(FEATURE_SSE4_2);
495 setFeature(FEATURE_POPCNT);
497 setFeature(FEATURE_AES);
499 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
500 // indicates that the AVX registers will be saved and restored on context
501 // switch, then we have full AVX support.
502 const unsigned AVXBits = (1 << 27) | (1 << 28);
503 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
504 ((EAX & 0x6) == 0x6);
505 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
508 setFeature(FEATURE_AVX);
511 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
513 if (HasLeaf7 && ((EBX >> 3) & 1))
514 setFeature(FEATURE_BMI);
515 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
516 setFeature(FEATURE_AVX2);
517 if (HasLeaf7 && ((EBX >> 9) & 1))
518 setFeature(FEATURE_BMI2);
519 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
520 setFeature(FEATURE_AVX512F);
521 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
522 setFeature(FEATURE_AVX512DQ);
523 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
524 setFeature(FEATURE_AVX512IFMA);
525 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
526 setFeature(FEATURE_AVX512PF);
527 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
528 setFeature(FEATURE_AVX512ER);
529 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
530 setFeature(FEATURE_AVX512CD);
531 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
532 setFeature(FEATURE_AVX512BW);
533 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
534 setFeature(FEATURE_AVX512VL);
536 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
537 setFeature(FEATURE_AVX512VBMI);
538 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
539 setFeature(FEATURE_AVX512VBMI2);
540 if (HasLeaf7 && ((ECX >> 8) & 1))
541 setFeature(FEATURE_GFNI);
542 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
543 setFeature(FEATURE_VPCLMULQDQ);
544 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
545 setFeature(FEATURE_AVX512VNNI);
546 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
547 setFeature(FEATURE_AVX512BITALG);
548 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
549 setFeature(FEATURE_AVX512VPOPCNTDQ);
551 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
552 setFeature(FEATURE_AVX5124VNNIW);
553 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
554 setFeature(FEATURE_AVX5124FMAPS);
556 unsigned MaxExtLevel;
557 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
559 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
560 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
561 if (HasExtLeaf1 && ((ECX >> 6) & 1))
562 setFeature(FEATURE_SSE4_A);
563 if (HasExtLeaf1 && ((ECX >> 11) & 1))
564 setFeature(FEATURE_XOP);
565 if (HasExtLeaf1 && ((ECX >> 16) & 1))
566 setFeature(FEATURE_FMA4);
568 *FeaturesOut = Features;
569 *Features2Out = Features2;
573 #if defined(HAVE_INIT_PRIORITY)
574 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
575 #elif __has_attribute(__constructor__)
576 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
578 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
579 // this runs during initialization.
580 #define CONSTRUCTOR_ATTRIBUTE
583 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
585 struct __processor_model {
586 unsigned int __cpu_vendor;
587 unsigned int __cpu_type;
588 unsigned int __cpu_subtype;
589 unsigned int __cpu_features[1];
590 } __cpu_model = {0, 0, 0, {0}};
591 unsigned int __cpu_features2;
593 /* A constructor function that is sets __cpu_model and __cpu_features2 with
594 the right values. This needs to run only once. This constructor is
595 given the highest priority and it should run before constructors without
596 the priority set. However, it still runs after ifunc initializers and
597 needs to be called explicitly there. */
599 int CONSTRUCTOR_ATTRIBUTE
600 __cpu_indicator_init(void) {
601 unsigned EAX, EBX, ECX, EDX;
602 unsigned MaxLeaf = 5;
604 unsigned Model, Family, Brand_id;
605 unsigned Features = 0;
606 unsigned Features2 = 0;
608 /* This function needs to run just once. */
609 if (__cpu_model.__cpu_vendor)
612 if (!isCpuIdSupported())
615 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
616 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
617 __cpu_model.__cpu_vendor = VENDOR_OTHER;
620 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
621 detectX86FamilyModel(EAX, &Family, &Model);
622 Brand_id = EBX & 0xff;
624 /* Find available features. */
625 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
626 __cpu_model.__cpu_features[0] = Features;
627 __cpu_features2 = Features2;
629 if (Vendor == SIG_INTEL) {
631 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
632 &(__cpu_model.__cpu_type),
633 &(__cpu_model.__cpu_subtype));
634 __cpu_model.__cpu_vendor = VENDOR_INTEL;
635 } else if (Vendor == SIG_AMD) {
637 getAMDProcessorTypeAndSubtype(Family, Model, Features,
638 &(__cpu_model.__cpu_type),
639 &(__cpu_model.__cpu_subtype));
640 __cpu_model.__cpu_vendor = VENDOR_AMD;
642 __cpu_model.__cpu_vendor = VENDOR_OTHER;
644 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
645 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
646 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);