1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is based on LLVM's lib/Support/Host.cpp.
11 // It implements the operating system Host concept and builtin
12 // __cpu_model for the compiler_rt library, for x86 only.
14 //===----------------------------------------------------------------------===//
16 #if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
30 #ifndef __has_attribute
31 #define __has_attribute(attr) 0
34 enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
39 enum ProcessorVendors {
73 enum ProcessorSubtypes {
74 INTEL_COREI7_NEHALEM = 1,
75 INTEL_COREI7_WESTMERE,
76 INTEL_COREI7_SANDYBRIDGE,
85 INTEL_COREI7_IVYBRIDGE,
87 INTEL_COREI7_BROADWELL,
89 INTEL_COREI7_SKYLAKE_AVX512,
91 INTEL_ATOM_SILVERMONT,
92 INTEL_KNIGHTS_LANDING,
111 enum ProcessorFeatures {
130 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
131 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
132 // support. Consequently, for i386, the presence of CPUID is checked first
133 // via the corresponding eflags bit.
134 static bool isCpuIdSupported() {
135 #if defined(__GNUC__) || defined(__clang__)
136 #if defined(__i386__)
137 int __cpuid_supported;
140 " movl %%eax,%%ecx\n"
141 " xorl $0x00200000,%%eax\n"
147 " cmpl %%eax,%%ecx\n"
151 : "=r"(__cpuid_supported)
154 if (!__cpuid_supported)
162 // This code is copied from lib/Support/Host.cpp.
163 // Changes to either file should be mirrored in the other.
165 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
166 /// the specified arguments. If we can't run cpuid on the host, return true.
167 static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
168 unsigned *rECX, unsigned *rEDX) {
169 #if defined(__GNUC__) || defined(__clang__)
170 #if defined(__x86_64__)
171 // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
172 __asm__("movq\t%%rbx, %%rsi\n\t"
174 "xchgq\t%%rbx, %%rsi\n\t"
175 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
177 #elif defined(__i386__)
178 __asm__("movl\t%%ebx, %%esi\n\t"
180 "xchgl\t%%ebx, %%esi\n\t"
181 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
183 // pedantic #else returns to appease -Wunreachable-code (so we don't generate
184 // postprocessed code that looks like "return true; return false;")
186 assert(0 && "This method is defined only for x86.");
188 #elif defined(_MSC_VER)
189 // The MSVC intrinsic is portable across x86 and x64.
191 __cpuid(registers, value);
192 *rEAX = registers[0];
193 *rEBX = registers[1];
194 *rECX = registers[2];
195 *rEDX = registers[3];
197 assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
201 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
202 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
204 static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
205 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
207 #if defined(__x86_64__) || defined(_M_X64)
208 #if defined(__GNUC__) || defined(__clang__)
209 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
210 // FIXME: should we save this for Clang?
211 __asm__("movq\t%%rbx, %%rsi\n\t"
213 "xchgq\t%%rbx, %%rsi\n\t"
214 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
215 : "a"(value), "c"(subleaf));
216 #elif defined(_MSC_VER)
218 __cpuidex(registers, value, subleaf);
219 *rEAX = registers[0];
220 *rEBX = registers[1];
221 *rECX = registers[2];
222 *rEDX = registers[3];
224 assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
226 #elif defined(__i386__) || defined(_M_IX86)
227 #if defined(__GNUC__) || defined(__clang__)
228 __asm__("movl\t%%ebx, %%esi\n\t"
230 "xchgl\t%%ebx, %%esi\n\t"
231 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
232 : "a"(value), "c"(subleaf));
233 #elif defined(_MSC_VER)
239 mov dword ptr [esi],eax
241 mov dword ptr [esi],ebx
243 mov dword ptr [esi],ecx
245 mov dword ptr [esi],edx
248 assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
251 assert(0 && "This method is defined only for x86.");
255 // Read control register 0 (XCR0). Used to detect features such as AVX.
256 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
257 #if defined(__GNUC__) || defined(__clang__)
258 // Check xgetbv; this uses a .byte sequence instead of the instruction
259 // directly because older assemblers do not include support for xgetbv and
260 // there is no easy way to conditionally compile based on the assembler used.
261 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
263 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
264 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
266 *rEDX = Result >> 32;
273 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
275 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
276 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
277 if (*Family == 6 || *Family == 0xf) {
279 // Examine extended family ID if family ID is F.
280 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
281 // Examine extended model ID if family ID is 6 or F.
282 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
286 static void getIntelProcessorTypeAndSubtype(unsigned int Family,
288 unsigned int Brand_id,
289 unsigned int Features,
290 unsigned *Type, unsigned *Subtype) {
299 case 0: // Intel486 DX processors
300 case 1: // Intel486 DX processors
301 case 2: // Intel486 SX processors
302 case 3: // Intel487 processors, IntelDX2 OverDrive processors,
303 // IntelDX2 processors
304 case 4: // Intel486 SL processor
305 case 5: // IntelSX2 processors
306 case 7: // Write-Back Enhanced IntelDX2 processors
307 case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
314 case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
315 // Pentium processors (60, 66)
316 case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
317 // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
319 case 3: // Pentium OverDrive processors for Intel486 processor-based
321 *Type = INTEL_PENTIUM;
323 case 4: // Pentium OverDrive processor with MMX technology for Pentium
324 // processor (75, 90, 100, 120, 133), Pentium processor with
325 // MMX technology (166, 200)
326 *Type = INTEL_PENTIUM;
327 *Subtype = INTEL_PENTIUM_MMX;
330 *Type = INTEL_PENTIUM;
335 case 0x01: // Pentium Pro processor
336 *Type = INTEL_PENTIUM_PRO;
338 case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
340 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
341 // model 05, and Intel Celeron processor, model 05
342 case 0x06: // Celeron processor, model 06
343 *Type = INTEL_PENTIUM_II;
345 case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
346 // processor, model 07
347 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
348 // model 08, and Celeron processor, model 08
349 case 0x0a: // Pentium III Xeon processor, model 0Ah
350 case 0x0b: // Pentium III processor, model 0Bh
351 *Type = INTEL_PENTIUM_III;
353 case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
354 case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
355 // 0Dh. All processors are manufactured using the 90 nm process.
356 case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
357 // Integrated Processor with Intel QuickAssist Technology
358 *Type = INTEL_PENTIUM_M;
360 case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
361 // 0Eh. All processors are manufactured using the 65 nm process.
362 *Type = INTEL_CORE_DUO;
364 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
365 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
366 // mobile processor, Intel Core 2 Extreme processor, Intel
367 // Pentium Dual-Core processor, Intel Xeon processor, model
368 // 0Fh. All processors are manufactured using the 65 nm process.
369 case 0x16: // Intel Celeron processor model 16h. All processors are
370 // manufactured using the 65 nm process
371 *Type = INTEL_CORE2; // "core2"
372 *Subtype = INTEL_CORE2_65;
374 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
375 // 17h. All processors are manufactured using the 45 nm process.
377 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
378 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
379 // the 45 nm process.
380 *Type = INTEL_CORE2; // "penryn"
381 *Subtype = INTEL_CORE2_45;
383 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
384 // processors are manufactured using the 45 nm process.
385 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
386 // As found in a Summer 2010 model iMac.
388 case 0x2e: // Nehalem EX
389 *Type = INTEL_COREI7; // "nehalem"
390 *Subtype = INTEL_COREI7_NEHALEM;
392 case 0x25: // Intel Core i7, laptop version.
393 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
394 // processors are manufactured using the 32 nm process.
395 case 0x2f: // Westmere EX
396 *Type = INTEL_COREI7; // "westmere"
397 *Subtype = INTEL_COREI7_WESTMERE;
399 case 0x2a: // Intel Core i7 processor. All processors are manufactured
400 // using the 32 nm process.
402 *Type = INTEL_COREI7; //"sandybridge"
403 *Subtype = INTEL_COREI7_SANDYBRIDGE;
406 case 0x3e: // Ivy Bridge EP
407 *Type = INTEL_COREI7; // "ivybridge"
408 *Subtype = INTEL_COREI7_IVYBRIDGE;
416 *Type = INTEL_COREI7; // "haswell"
417 *Subtype = INTEL_COREI7_HASWELL;
425 *Type = INTEL_COREI7; // "broadwell"
426 *Subtype = INTEL_COREI7_BROADWELL;
431 *Type = INTEL_COREI7; // "skylake-avx512"
432 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
435 *Type = INTEL_COREI7; // "skylake"
436 *Subtype = INTEL_COREI7_SKYLAKE;
439 case 0x1c: // Most 45 nm Intel Atom processors
440 case 0x26: // 45 nm Atom Lincroft
441 case 0x27: // 32 nm Atom Medfield
442 case 0x35: // 32 nm Atom Midview
443 case 0x36: // 32 nm Atom Midview
445 *Subtype = INTEL_ATOM_BONNELL;
448 // Atom Silvermont codes from the Intel software optimization guide.
454 case 0x4c: // really airmont
456 *Subtype = INTEL_ATOM_SILVERMONT;
457 break; // "silvermont"
460 *Type = INTEL_XEONPHI; // knl
461 *Subtype = INTEL_KNIGHTS_LANDING;
464 default: // Unknown family 6 CPU, try to guess.
465 if (Features & (1 << FEATURE_AVX512)) {
466 *Type = INTEL_XEONPHI; // knl
467 *Subtype = INTEL_KNIGHTS_LANDING;
470 if (Features & (1 << FEATURE_ADX)) {
471 *Type = INTEL_COREI7;
472 *Subtype = INTEL_COREI7_BROADWELL;
475 if (Features & (1 << FEATURE_AVX2)) {
476 *Type = INTEL_COREI7;
477 *Subtype = INTEL_COREI7_HASWELL;
480 if (Features & (1 << FEATURE_AVX)) {
481 *Type = INTEL_COREI7;
482 *Subtype = INTEL_COREI7_SANDYBRIDGE;
485 if (Features & (1 << FEATURE_SSE4_2)) {
486 if (Features & (1 << FEATURE_MOVBE)) {
488 *Subtype = INTEL_ATOM_SILVERMONT;
490 *Type = INTEL_COREI7;
491 *Subtype = INTEL_COREI7_NEHALEM;
495 if (Features & (1 << FEATURE_SSE4_1)) {
496 *Type = INTEL_CORE2; // "penryn"
497 *Subtype = INTEL_CORE2_45;
500 if (Features & (1 << FEATURE_SSSE3)) {
501 if (Features & (1 << FEATURE_MOVBE)) {
503 *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
505 *Type = INTEL_CORE2; // "core2"
506 *Subtype = INTEL_CORE2_65;
510 if (Features & (1 << FEATURE_EM64T)) {
511 *Type = INTEL_X86_64;
514 if (Features & (1 << FEATURE_SSE2)) {
515 *Type = INTEL_PENTIUM_M;
518 if (Features & (1 << FEATURE_SSE)) {
519 *Type = INTEL_PENTIUM_III;
522 if (Features & (1 << FEATURE_MMX)) {
523 *Type = INTEL_PENTIUM_II;
526 *Type = INTEL_PENTIUM_PRO;
531 case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
532 // model 00h and manufactured using the 0.18 micron process.
533 case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
534 // processor MP, and Intel Celeron processor. All processors are
535 // model 01h and manufactured using the 0.18 micron process.
536 case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
537 // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
538 // processor, and Mobile Intel Celeron processor. All processors
539 // are model 02h and manufactured using the 0.13 micron process.
541 ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
544 case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
545 // processor. All processors are model 03h and manufactured using
546 // the 90 nm process.
547 case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
548 // Pentium D processor, Intel Xeon processor, Intel Xeon
549 // processor MP, Intel Celeron D processor. All processors are
550 // model 04h and manufactured using the 90 nm process.
551 case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
552 // Extreme Edition, Intel Xeon processor, Intel Xeon processor
553 // MP, Intel Celeron D processor. All processors are model 06h
554 // and manufactured using the 65 nm process.
556 ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
561 ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
570 static void getAMDProcessorTypeAndSubtype(unsigned int Family,
572 unsigned int Features, unsigned *Type,
574 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
575 // appears to be no way to generate the wide variety of AMD-specific targets
576 // from the information returned from CPUID.
585 *Subtype = AMDPENTIUM_K6;
588 *Subtype = AMDPENTIUM_K62;
592 *Subtype = AMDPENTIUM_K63;
595 *Subtype = AMDPENTIUM_GEODE;
604 *Subtype = AMDATHLON_TBIRD;
605 break; // "athlon-tbird"
609 *Subtype = AMDATHLON_MP;
610 break; // "athlon-mp"
612 *Subtype = AMDATHLON_XP;
613 break; // "athlon-xp"
619 if (Features & (1 << FEATURE_SSE3)) {
620 *Subtype = AMDATHLON_K8SSE3;
625 *Subtype = AMDATHLON_OPTERON;
628 *Subtype = AMDATHLON_FX;
629 break; // "athlon-fx"; also opteron
631 *Subtype = AMDATHLON_64;
635 *Type = AMDFAM10H; // "amdfam10"
638 *Subtype = AMDFAM10H_BARCELONA;
641 *Subtype = AMDFAM10H_SHANGHAI;
644 *Subtype = AMDFAM10H_ISTANBUL;
651 *Subtype = AMD_BTVER1;
656 (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
657 *Subtype = AMD_BTVER1;
660 if (Model >= 0x50 && Model <= 0x6f) {
661 *Subtype = AMDFAM15H_BDVER4;
662 break; // "bdver4"; 50h-6Fh: Excavator
664 if (Model >= 0x30 && Model <= 0x3f) {
665 *Subtype = AMDFAM15H_BDVER3;
666 break; // "bdver3"; 30h-3Fh: Steamroller
668 if (Model >= 0x10 && Model <= 0x1f) {
669 *Subtype = AMDFAM15H_BDVER2;
670 break; // "bdver2"; 10h-1Fh: Piledriver
673 *Subtype = AMDFAM15H_BDVER1;
674 break; // "bdver1"; 00h-0Fh: Bulldozer
680 (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
681 *Subtype = AMD_BTVER1;
684 *Subtype = AMD_BTVER2;
691 static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
693 unsigned Features = 0;
694 unsigned int EAX, EBX;
695 Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
696 Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
697 Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
698 Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
699 Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
700 Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
701 Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
702 Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
704 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
705 // indicates that the AVX registers will be saved and restored on context
706 // switch, then we have full AVX support.
707 const unsigned AVXBits = (1 << 27) | (1 << 28);
708 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
709 ((EAX & 0x6) == 0x6);
710 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
711 bool HasLeaf7 = MaxLeaf >= 0x7;
712 getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
713 bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
714 bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
715 bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
716 Features |= (HasAVX << FEATURE_AVX);
717 Features |= (HasAVX2 << FEATURE_AVX2);
718 Features |= (HasAVX512 << FEATURE_AVX512);
719 Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
720 Features |= (HasADX << FEATURE_ADX);
722 getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
723 Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
727 #if defined(HAVE_INIT_PRIORITY)
728 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
729 #elif __has_attribute(__constructor__)
730 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
732 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
733 // this runs during initialization.
734 #define CONSTRUCTOR_ATTRIBUTE
737 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
739 struct __processor_model {
740 unsigned int __cpu_vendor;
741 unsigned int __cpu_type;
742 unsigned int __cpu_subtype;
743 unsigned int __cpu_features[1];
744 } __cpu_model = {0, 0, 0, {0}};
746 /* A constructor function that is sets __cpu_model and __cpu_features with
747 the right values. This needs to run only once. This constructor is
748 given the highest priority and it should run before constructors without
749 the priority set. However, it still runs after ifunc initializers and
750 needs to be called explicitly there. */
752 int CONSTRUCTOR_ATTRIBUTE
753 __cpu_indicator_init(void) {
754 unsigned int EAX, EBX, ECX, EDX;
755 unsigned int MaxLeaf = 5;
757 unsigned int Model, Family, Brand_id;
758 unsigned int Features = 0;
760 /* This function needs to run just once. */
761 if (__cpu_model.__cpu_vendor)
764 if (!isCpuIdSupported())
767 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
768 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
771 __cpu_model.__cpu_vendor = VENDOR_OTHER;
774 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
775 detectX86FamilyModel(EAX, &Family, &Model);
776 Brand_id = EBX & 0xff;
778 /* Find available features. */
779 Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
780 __cpu_model.__cpu_features[0] = Features;
782 if (Vendor == SIG_INTEL) {
784 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
785 &(__cpu_model.__cpu_type),
786 &(__cpu_model.__cpu_subtype));
787 __cpu_model.__cpu_vendor = VENDOR_INTEL;
788 } else if (Vendor == SIG_AMD) {
790 getAMDProcessorTypeAndSubtype(Family, Model, Features,
791 &(__cpu_model.__cpu_type),
792 &(__cpu_model.__cpu_subtype));
793 __cpu_model.__cpu_vendor = VENDOR_AMD;
795 __cpu_model.__cpu_vendor = VENDOR_OTHER;
797 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
798 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
799 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);