1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is based on LLVM's lib/Support/Host.cpp.
11 // It implements the operating system Host concept and builtin
12 // __cpu_model for the compiler_rt library, for x86 only.
14 //===----------------------------------------------------------------------===//
16 #if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
30 enum VendorSignatures {
31 SIG_INTEL = 0x756e6547 /* Genu */,
32 SIG_AMD = 0x68747541 /* Auth */
35 enum ProcessorVendors {
69 enum ProcessorSubtypes {
70 INTEL_COREI7_NEHALEM = 1,
71 INTEL_COREI7_WESTMERE,
72 INTEL_COREI7_SANDYBRIDGE,
81 INTEL_COREI7_IVYBRIDGE,
83 INTEL_COREI7_BROADWELL,
85 INTEL_COREI7_SKYLAKE_AVX512,
87 INTEL_ATOM_SILVERMONT,
88 INTEL_KNIGHTS_LANDING,
107 enum ProcessorFeatures {
126 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
127 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
128 // support. Consequently, for i386, the presence of CPUID is checked first
129 // via the corresponding eflags bit.
130 static bool isCpuIdSupported() {
131 #if defined(__GNUC__) || defined(__clang__)
132 #if defined(__i386__)
133 int __cpuid_supported;
136 " movl %%eax,%%ecx\n"
137 " xorl $0x00200000,%%eax\n"
143 " cmpl %%eax,%%ecx\n"
147 : "=r"(__cpuid_supported)
150 if (!__cpuid_supported)
158 // This code is copied from lib/Support/Host.cpp.
159 // Changes to either file should be mirrored in the other.
161 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
162 /// the specified arguments. If we can't run cpuid on the host, return true.
163 static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
164 unsigned *rECX, unsigned *rEDX) {
165 #if defined(__GNUC__) || defined(__clang__)
166 #if defined(__x86_64__)
167 // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
168 __asm__("movq\t%%rbx, %%rsi\n\t"
170 "xchgq\t%%rbx, %%rsi\n\t"
171 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
173 #elif defined(__i386__)
174 __asm__("movl\t%%ebx, %%esi\n\t"
176 "xchgl\t%%ebx, %%esi\n\t"
177 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
179 // pedantic #else returns to appease -Wunreachable-code (so we don't generate
180 // postprocessed code that looks like "return true; return false;")
182 assert(0 && "This method is defined only for x86.");
184 #elif defined(_MSC_VER)
185 // The MSVC intrinsic is portable across x86 and x64.
187 __cpuid(registers, value);
188 *rEAX = registers[0];
189 *rEBX = registers[1];
190 *rECX = registers[2];
191 *rEDX = registers[3];
193 assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
197 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
198 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
200 static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
201 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
203 #if defined(__x86_64__) || defined(_M_X64)
204 #if defined(__GNUC__) || defined(__clang__)
205 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
206 // FIXME: should we save this for Clang?
207 __asm__("movq\t%%rbx, %%rsi\n\t"
209 "xchgq\t%%rbx, %%rsi\n\t"
210 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
211 : "a"(value), "c"(subleaf));
212 #elif defined(_MSC_VER)
214 __cpuidex(registers, value, subleaf);
215 *rEAX = registers[0];
216 *rEBX = registers[1];
217 *rECX = registers[2];
218 *rEDX = registers[3];
220 assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
222 #elif defined(__i386__) || defined(_M_IX86)
223 #if defined(__GNUC__) || defined(__clang__)
224 __asm__("movl\t%%ebx, %%esi\n\t"
226 "xchgl\t%%ebx, %%esi\n\t"
227 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
228 : "a"(value), "c"(subleaf));
229 #elif defined(_MSC_VER)
235 mov dword ptr [esi],eax
237 mov dword ptr [esi],ebx
239 mov dword ptr [esi],ecx
241 mov dword ptr [esi],edx
244 assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
247 assert(0 && "This method is defined only for x86.");
251 // Read control register 0 (XCR0). Used to detect features such as AVX.
252 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
253 #if defined(__GNUC__) || defined(__clang__)
254 // Check xgetbv; this uses a .byte sequence instead of the instruction
255 // directly because older assemblers do not include support for xgetbv and
256 // there is no easy way to conditionally compile based on the assembler used.
257 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
259 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
260 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
262 *rEDX = Result >> 32;
269 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
271 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
272 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
273 if (*Family == 6 || *Family == 0xf) {
275 // Examine extended family ID if family ID is F.
276 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
277 // Examine extended model ID if family ID is 6 or F.
278 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
282 static void getIntelProcessorTypeAndSubtype(unsigned int Family,
284 unsigned int Brand_id,
285 unsigned int Features,
286 unsigned *Type, unsigned *Subtype) {
295 case 0: // Intel486 DX processors
296 case 1: // Intel486 DX processors
297 case 2: // Intel486 SX processors
298 case 3: // Intel487 processors, IntelDX2 OverDrive processors,
299 // IntelDX2 processors
300 case 4: // Intel486 SL processor
301 case 5: // IntelSX2 processors
302 case 7: // Write-Back Enhanced IntelDX2 processors
303 case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
310 case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
311 // Pentium processors (60, 66)
312 case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
313 // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
315 case 3: // Pentium OverDrive processors for Intel486 processor-based
317 *Type = INTEL_PENTIUM;
319 case 4: // Pentium OverDrive processor with MMX technology for Pentium
320 // processor (75, 90, 100, 120, 133), Pentium processor with
321 // MMX technology (166, 200)
322 *Type = INTEL_PENTIUM;
323 *Subtype = INTEL_PENTIUM_MMX;
326 *Type = INTEL_PENTIUM;
331 case 0x01: // Pentium Pro processor
332 *Type = INTEL_PENTIUM_PRO;
334 case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
336 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
337 // model 05, and Intel Celeron processor, model 05
338 case 0x06: // Celeron processor, model 06
339 *Type = INTEL_PENTIUM_II;
341 case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
342 // processor, model 07
343 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
344 // model 08, and Celeron processor, model 08
345 case 0x0a: // Pentium III Xeon processor, model 0Ah
346 case 0x0b: // Pentium III processor, model 0Bh
347 *Type = INTEL_PENTIUM_III;
349 case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
350 case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
351 // 0Dh. All processors are manufactured using the 90 nm process.
352 case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
353 // Integrated Processor with Intel QuickAssist Technology
354 *Type = INTEL_PENTIUM_M;
356 case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
357 // 0Eh. All processors are manufactured using the 65 nm process.
358 *Type = INTEL_CORE_DUO;
360 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
361 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
362 // mobile processor, Intel Core 2 Extreme processor, Intel
363 // Pentium Dual-Core processor, Intel Xeon processor, model
364 // 0Fh. All processors are manufactured using the 65 nm process.
365 case 0x16: // Intel Celeron processor model 16h. All processors are
366 // manufactured using the 65 nm process
367 *Type = INTEL_CORE2; // "core2"
368 *Subtype = INTEL_CORE2_65;
370 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
371 // 17h. All processors are manufactured using the 45 nm process.
373 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
374 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
375 // the 45 nm process.
376 *Type = INTEL_CORE2; // "penryn"
377 *Subtype = INTEL_CORE2_45;
379 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
380 // processors are manufactured using the 45 nm process.
381 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
382 // As found in a Summer 2010 model iMac.
384 case 0x2e: // Nehalem EX
385 *Type = INTEL_COREI7; // "nehalem"
386 *Subtype = INTEL_COREI7_NEHALEM;
388 case 0x25: // Intel Core i7, laptop version.
389 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
390 // processors are manufactured using the 32 nm process.
391 case 0x2f: // Westmere EX
392 *Type = INTEL_COREI7; // "westmere"
393 *Subtype = INTEL_COREI7_WESTMERE;
395 case 0x2a: // Intel Core i7 processor. All processors are manufactured
396 // using the 32 nm process.
398 *Type = INTEL_COREI7; //"sandybridge"
399 *Subtype = INTEL_COREI7_SANDYBRIDGE;
402 case 0x3e: // Ivy Bridge EP
403 *Type = INTEL_COREI7; // "ivybridge"
404 *Subtype = INTEL_COREI7_IVYBRIDGE;
412 *Type = INTEL_COREI7; // "haswell"
413 *Subtype = INTEL_COREI7_HASWELL;
421 *Type = INTEL_COREI7; // "broadwell"
422 *Subtype = INTEL_COREI7_BROADWELL;
427 *Type = INTEL_COREI7; // "skylake-avx512"
428 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
431 *Type = INTEL_COREI7; // "skylake"
432 *Subtype = INTEL_COREI7_SKYLAKE;
435 case 0x1c: // Most 45 nm Intel Atom processors
436 case 0x26: // 45 nm Atom Lincroft
437 case 0x27: // 32 nm Atom Medfield
438 case 0x35: // 32 nm Atom Midview
439 case 0x36: // 32 nm Atom Midview
441 *Subtype = INTEL_ATOM_BONNELL;
444 // Atom Silvermont codes from the Intel software optimization guide.
450 case 0x4c: // really airmont
452 *Subtype = INTEL_ATOM_SILVERMONT;
453 break; // "silvermont"
456 *Type = INTEL_XEONPHI; // knl
457 *Subtype = INTEL_KNIGHTS_LANDING;
460 default: // Unknown family 6 CPU, try to guess.
461 if (Features & (1 << FEATURE_AVX512)) {
462 *Type = INTEL_XEONPHI; // knl
463 *Subtype = INTEL_KNIGHTS_LANDING;
466 if (Features & (1 << FEATURE_ADX)) {
467 *Type = INTEL_COREI7;
468 *Subtype = INTEL_COREI7_BROADWELL;
471 if (Features & (1 << FEATURE_AVX2)) {
472 *Type = INTEL_COREI7;
473 *Subtype = INTEL_COREI7_HASWELL;
476 if (Features & (1 << FEATURE_AVX)) {
477 *Type = INTEL_COREI7;
478 *Subtype = INTEL_COREI7_SANDYBRIDGE;
481 if (Features & (1 << FEATURE_SSE4_2)) {
482 if (Features & (1 << FEATURE_MOVBE)) {
484 *Subtype = INTEL_ATOM_SILVERMONT;
486 *Type = INTEL_COREI7;
487 *Subtype = INTEL_COREI7_NEHALEM;
491 if (Features & (1 << FEATURE_SSE4_1)) {
492 *Type = INTEL_CORE2; // "penryn"
493 *Subtype = INTEL_CORE2_45;
496 if (Features & (1 << FEATURE_SSSE3)) {
497 if (Features & (1 << FEATURE_MOVBE)) {
499 *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
501 *Type = INTEL_CORE2; // "core2"
502 *Subtype = INTEL_CORE2_65;
506 if (Features & (1 << FEATURE_EM64T)) {
507 *Type = INTEL_X86_64;
510 if (Features & (1 << FEATURE_SSE2)) {
511 *Type = INTEL_PENTIUM_M;
514 if (Features & (1 << FEATURE_SSE)) {
515 *Type = INTEL_PENTIUM_III;
518 if (Features & (1 << FEATURE_MMX)) {
519 *Type = INTEL_PENTIUM_II;
522 *Type = INTEL_PENTIUM_PRO;
527 case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
528 // model 00h and manufactured using the 0.18 micron process.
529 case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
530 // processor MP, and Intel Celeron processor. All processors are
531 // model 01h and manufactured using the 0.18 micron process.
532 case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
533 // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
534 // processor, and Mobile Intel Celeron processor. All processors
535 // are model 02h and manufactured using the 0.13 micron process.
537 ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
540 case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
541 // processor. All processors are model 03h and manufactured using
542 // the 90 nm process.
543 case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
544 // Pentium D processor, Intel Xeon processor, Intel Xeon
545 // processor MP, Intel Celeron D processor. All processors are
546 // model 04h and manufactured using the 90 nm process.
547 case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
548 // Extreme Edition, Intel Xeon processor, Intel Xeon processor
549 // MP, Intel Celeron D processor. All processors are model 06h
550 // and manufactured using the 65 nm process.
552 ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
557 ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
566 static void getAMDProcessorTypeAndSubtype(unsigned int Family,
568 unsigned int Features, unsigned *Type,
570 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
571 // appears to be no way to generate the wide variety of AMD-specific targets
572 // from the information returned from CPUID.
581 *Subtype = AMDPENTIUM_K6;
584 *Subtype = AMDPENTIUM_K62;
588 *Subtype = AMDPENTIUM_K63;
591 *Subtype = AMDPENTIUM_GEODE;
600 *Subtype = AMDATHLON_TBIRD;
601 break; // "athlon-tbird"
605 *Subtype = AMDATHLON_MP;
606 break; // "athlon-mp"
608 *Subtype = AMDATHLON_XP;
609 break; // "athlon-xp"
615 if (Features & (1 << FEATURE_SSE3)) {
616 *Subtype = AMDATHLON_K8SSE3;
621 *Subtype = AMDATHLON_OPTERON;
624 *Subtype = AMDATHLON_FX;
625 break; // "athlon-fx"; also opteron
627 *Subtype = AMDATHLON_64;
631 *Type = AMDFAM10H; // "amdfam10"
634 *Subtype = AMDFAM10H_BARCELONA;
637 *Subtype = AMDFAM10H_SHANGHAI;
640 *Subtype = AMDFAM10H_ISTANBUL;
647 *Subtype = AMD_BTVER1;
652 (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
653 *Subtype = AMD_BTVER1;
656 if (Model >= 0x50 && Model <= 0x6f) {
657 *Subtype = AMDFAM15H_BDVER4;
658 break; // "bdver4"; 50h-6Fh: Excavator
660 if (Model >= 0x30 && Model <= 0x3f) {
661 *Subtype = AMDFAM15H_BDVER3;
662 break; // "bdver3"; 30h-3Fh: Steamroller
664 if (Model >= 0x10 && Model <= 0x1f) {
665 *Subtype = AMDFAM15H_BDVER2;
666 break; // "bdver2"; 10h-1Fh: Piledriver
669 *Subtype = AMDFAM15H_BDVER1;
670 break; // "bdver1"; 00h-0Fh: Bulldozer
676 (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
677 *Subtype = AMD_BTVER1;
680 *Subtype = AMD_BTVER2;
687 static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
689 unsigned Features = 0;
690 unsigned int EAX, EBX;
691 Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
692 Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
693 Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
694 Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
695 Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
696 Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
697 Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
698 Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
700 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
701 // indicates that the AVX registers will be saved and restored on context
702 // switch, then we have full AVX support.
703 const unsigned AVXBits = (1 << 27) | (1 << 28);
704 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
705 ((EAX & 0x6) == 0x6);
706 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
707 bool HasLeaf7 = MaxLeaf >= 0x7;
708 getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
709 bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
710 bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
711 bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
712 Features |= (HasAVX << FEATURE_AVX);
713 Features |= (HasAVX2 << FEATURE_AVX2);
714 Features |= (HasAVX512 << FEATURE_AVX512);
715 Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
716 Features |= (HasADX << FEATURE_ADX);
718 getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
719 Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
723 #ifdef HAVE_INIT_PRIORITY
724 #define CONSTRUCTOR_PRIORITY (101)
726 #define CONSTRUCTOR_PRIORITY
729 int __cpu_indicator_init(void)
730 __attribute__((constructor CONSTRUCTOR_PRIORITY));
732 struct __processor_model {
733 unsigned int __cpu_vendor;
734 unsigned int __cpu_type;
735 unsigned int __cpu_subtype;
736 unsigned int __cpu_features[1];
737 } __cpu_model = {0, 0, 0, {0}};
739 /* A constructor function that is sets __cpu_model and __cpu_features with
740 the right values. This needs to run only once. This constructor is
741 given the highest priority and it should run before constructors without
742 the priority set. However, it still runs after ifunc initializers and
743 needs to be called explicitly there. */
745 int __attribute__((constructor CONSTRUCTOR_PRIORITY))
746 __cpu_indicator_init(void) {
747 unsigned int EAX, EBX, ECX, EDX;
748 unsigned int MaxLeaf = 5;
750 unsigned int Model, Family, Brand_id;
751 unsigned int Features = 0;
753 /* This function needs to run just once. */
754 if (__cpu_model.__cpu_vendor)
757 if (!isCpuIdSupported())
760 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
761 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
764 __cpu_model.__cpu_vendor = VENDOR_OTHER;
767 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
768 detectX86FamilyModel(EAX, &Family, &Model);
769 Brand_id = EBX & 0xff;
771 /* Find available features. */
772 Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
773 __cpu_model.__cpu_features[0] = Features;
775 if (Vendor == SIG_INTEL) {
777 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
778 &(__cpu_model.__cpu_type),
779 &(__cpu_model.__cpu_subtype));
780 __cpu_model.__cpu_vendor = VENDOR_INTEL;
781 } else if (Vendor == SIG_AMD) {
783 getAMDProcessorTypeAndSubtype(Family, Model, Features,
784 &(__cpu_model.__cpu_type),
785 &(__cpu_model.__cpu_subtype));
786 __cpu_model.__cpu_vendor = VENDOR_AMD;
788 __cpu_model.__cpu_vendor = VENDOR_OTHER;
790 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
791 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
792 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);