1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is based on LLVM's lib/Support/Host.cpp.
11 // It implements the operating system Host concept and builtin
12 // __cpu_model for the compiler_rt library, for x86 only.
14 //===----------------------------------------------------------------------===//
16 #if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
30 #ifndef __has_attribute
31 #define __has_attribute(attr) 0
34 enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
39 enum ProcessorVendors {
60 enum ProcessorSubtypes {
61 INTEL_COREI7_NEHALEM = 1,
62 INTEL_COREI7_WESTMERE,
63 INTEL_COREI7_SANDYBRIDGE,
72 INTEL_COREI7_IVYBRIDGE,
74 INTEL_COREI7_BROADWELL,
76 INTEL_COREI7_SKYLAKE_AVX512,
80 enum ProcessorFeatures {
109 FEATURE_AVX5124VNNIW,
110 FEATURE_AVX5124FMAPS,
111 FEATURE_AVX512VPOPCNTDQ
114 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
115 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
116 // support. Consequently, for i386, the presence of CPUID is checked first
117 // via the corresponding eflags bit.
118 static bool isCpuIdSupported() {
119 #if defined(__GNUC__) || defined(__clang__)
120 #if defined(__i386__)
121 int __cpuid_supported;
124 " movl %%eax,%%ecx\n"
125 " xorl $0x00200000,%%eax\n"
131 " cmpl %%eax,%%ecx\n"
135 : "=r"(__cpuid_supported)
138 if (!__cpuid_supported)
146 // This code is copied from lib/Support/Host.cpp.
147 // Changes to either file should be mirrored in the other.
149 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
150 /// the specified arguments. If we can't run cpuid on the host, return true.
151 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
152 unsigned *rECX, unsigned *rEDX) {
153 #if defined(__GNUC__) || defined(__clang__)
154 #if defined(__x86_64__)
155 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
156 // FIXME: should we save this for Clang?
157 __asm__("movq\t%%rbx, %%rsi\n\t"
159 "xchgq\t%%rbx, %%rsi\n\t"
160 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
163 #elif defined(__i386__)
164 __asm__("movl\t%%ebx, %%esi\n\t"
166 "xchgl\t%%ebx, %%esi\n\t"
167 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
173 #elif defined(_MSC_VER)
174 // The MSVC intrinsic is portable across x86 and x64.
176 __cpuid(registers, value);
177 *rEAX = registers[0];
178 *rEBX = registers[1];
179 *rECX = registers[2];
180 *rEDX = registers[3];
187 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
188 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
190 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
191 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
193 #if defined(__x86_64__) || defined(_M_X64)
194 #if defined(__GNUC__) || defined(__clang__)
195 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
196 // FIXME: should we save this for Clang?
197 __asm__("movq\t%%rbx, %%rsi\n\t"
199 "xchgq\t%%rbx, %%rsi\n\t"
200 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
201 : "a"(value), "c"(subleaf));
203 #elif defined(_MSC_VER)
205 __cpuidex(registers, value, subleaf);
206 *rEAX = registers[0];
207 *rEBX = registers[1];
208 *rECX = registers[2];
209 *rEDX = registers[3];
214 #elif defined(__i386__) || defined(_M_IX86)
215 #if defined(__GNUC__) || defined(__clang__)
216 __asm__("movl\t%%ebx, %%esi\n\t"
218 "xchgl\t%%ebx, %%esi\n\t"
219 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
220 : "a"(value), "c"(subleaf));
222 #elif defined(_MSC_VER)
228 mov dword ptr [esi],eax
230 mov dword ptr [esi],ebx
232 mov dword ptr [esi],ecx
234 mov dword ptr [esi],edx
245 // Read control register 0 (XCR0). Used to detect features such as AVX.
246 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
247 #if defined(__GNUC__) || defined(__clang__)
248 // Check xgetbv; this uses a .byte sequence instead of the instruction
249 // directly because older assemblers do not include support for xgetbv and
250 // there is no easy way to conditionally compile based on the assembler used.
251 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
253 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
254 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
256 *rEDX = Result >> 32;
263 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
265 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
266 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
267 if (*Family == 6 || *Family == 0xf) {
269 // Examine extended family ID if family ID is F.
270 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
271 // Examine extended model ID if family ID is 6 or F.
272 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
277 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
278 unsigned Brand_id, unsigned Features,
279 unsigned *Type, unsigned *Subtype) {
285 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
286 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
287 // mobile processor, Intel Core 2 Extreme processor, Intel
288 // Pentium Dual-Core processor, Intel Xeon processor, model
289 // 0Fh. All processors are manufactured using the 65 nm process.
290 case 0x16: // Intel Celeron processor model 16h. All processors are
291 // manufactured using the 65 nm process
292 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
293 // 17h. All processors are manufactured using the 45 nm process.
295 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
296 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
297 // the 45 nm process.
298 *Type = INTEL_CORE2; // "penryn"
300 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
301 // processors are manufactured using the 45 nm process.
302 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
303 // As found in a Summer 2010 model iMac.
305 case 0x2e: // Nehalem EX
306 *Type = INTEL_COREI7; // "nehalem"
307 *Subtype = INTEL_COREI7_NEHALEM;
309 case 0x25: // Intel Core i7, laptop version.
310 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
311 // processors are manufactured using the 32 nm process.
312 case 0x2f: // Westmere EX
313 *Type = INTEL_COREI7; // "westmere"
314 *Subtype = INTEL_COREI7_WESTMERE;
316 case 0x2a: // Intel Core i7 processor. All processors are manufactured
317 // using the 32 nm process.
319 *Type = INTEL_COREI7; //"sandybridge"
320 *Subtype = INTEL_COREI7_SANDYBRIDGE;
323 case 0x3e: // Ivy Bridge EP
324 *Type = INTEL_COREI7; // "ivybridge"
325 *Subtype = INTEL_COREI7_IVYBRIDGE;
333 *Type = INTEL_COREI7; // "haswell"
334 *Subtype = INTEL_COREI7_HASWELL;
342 *Type = INTEL_COREI7; // "broadwell"
343 *Subtype = INTEL_COREI7_BROADWELL;
347 case 0x4e: // Skylake mobile
348 case 0x5e: // Skylake desktop
349 case 0x8e: // Kaby Lake mobile
350 case 0x9e: // Kaby Lake desktop
351 *Type = INTEL_COREI7; // "skylake"
352 *Subtype = INTEL_COREI7_SKYLAKE;
357 *Type = INTEL_COREI7;
358 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
361 case 0x1c: // Most 45 nm Intel Atom processors
362 case 0x26: // 45 nm Atom Lincroft
363 case 0x27: // 32 nm Atom Medfield
364 case 0x35: // 32 nm Atom Midview
365 case 0x36: // 32 nm Atom Midview
366 *Type = INTEL_BONNELL;
369 // Atom Silvermont codes from the Intel software optimization guide.
375 case 0x4c: // really airmont
376 *Type = INTEL_SILVERMONT;
377 break; // "silvermont"
380 *Type = INTEL_KNL; // knl
383 default: // Unknown family 6 CPU.
392 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
393 unsigned Features, unsigned *Type,
395 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
396 // appears to be no way to generate the wide variety of AMD-specific targets
397 // from the information returned from CPUID.
400 *Type = AMDFAM10H; // "amdfam10"
403 *Subtype = AMDFAM10H_BARCELONA;
406 *Subtype = AMDFAM10H_SHANGHAI;
409 *Subtype = AMDFAM10H_ISTANBUL;
418 if (Model >= 0x60 && Model <= 0x7f) {
419 *Subtype = AMDFAM15H_BDVER4;
420 break; // "bdver4"; 60h-7Fh: Excavator
422 if (Model >= 0x30 && Model <= 0x3f) {
423 *Subtype = AMDFAM15H_BDVER3;
424 break; // "bdver3"; 30h-3Fh: Steamroller
426 if (Model >= 0x10 && Model <= 0x1f) {
427 *Subtype = AMDFAM15H_BDVER2;
428 break; // "bdver2"; 10h-1Fh: Piledriver
431 *Subtype = AMDFAM15H_BDVER1;
432 break; // "bdver1"; 00h-0Fh: Bulldozer
440 *Subtype = AMDFAM17H_ZNVER1;
447 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
448 unsigned *FeaturesOut) {
449 unsigned Features = 0;
453 Features |= 1 << FEATURE_CMOV;
455 Features |= 1 << FEATURE_MMX;
457 Features |= 1 << FEATURE_SSE;
459 Features |= 1 << FEATURE_SSE2;
462 Features |= 1 << FEATURE_SSE3;
464 Features |= 1 << FEATURE_PCLMUL;
466 Features |= 1 << FEATURE_SSSE3;
468 Features |= 1 << FEATURE_FMA;
470 Features |= 1 << FEATURE_SSE4_1;
472 Features |= 1 << FEATURE_SSE4_2;
474 Features |= 1 << FEATURE_POPCNT;
476 Features |= 1 << FEATURE_AES;
478 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
479 // indicates that the AVX registers will be saved and restored on context
480 // switch, then we have full AVX support.
481 const unsigned AVXBits = (1 << 27) | (1 << 28);
482 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
483 ((EAX & 0x6) == 0x6);
484 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
487 Features |= 1 << FEATURE_AVX;
490 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
492 if (HasLeaf7 && ((EBX >> 3) & 1))
493 Features |= 1 << FEATURE_BMI;
494 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
495 Features |= 1 << FEATURE_AVX2;
496 if (HasLeaf7 && ((EBX >> 9) & 1))
497 Features |= 1 << FEATURE_BMI2;
498 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
499 Features |= 1 << FEATURE_AVX512F;
500 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
501 Features |= 1 << FEATURE_AVX512DQ;
502 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
503 Features |= 1 << FEATURE_AVX512IFMA;
504 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
505 Features |= 1 << FEATURE_AVX512PF;
506 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
507 Features |= 1 << FEATURE_AVX512ER;
508 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
509 Features |= 1 << FEATURE_AVX512CD;
510 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
511 Features |= 1 << FEATURE_AVX512BW;
512 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
513 Features |= 1 << FEATURE_AVX512VL;
515 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
516 Features |= 1 << FEATURE_AVX512VBMI;
517 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
518 Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
520 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
521 Features |= 1 << FEATURE_AVX5124VNNIW;
522 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
523 Features |= 1 << FEATURE_AVX5124FMAPS;
525 unsigned MaxExtLevel;
526 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
528 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
529 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
530 if (HasExtLeaf1 && ((ECX >> 6) & 1))
531 Features |= 1 << FEATURE_SSE4_A;
532 if (HasExtLeaf1 && ((ECX >> 11) & 1))
533 Features |= 1 << FEATURE_XOP;
534 if (HasExtLeaf1 && ((ECX >> 16) & 1))
535 Features |= 1 << FEATURE_FMA4;
537 *FeaturesOut = Features;
540 #if defined(HAVE_INIT_PRIORITY)
541 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
542 #elif __has_attribute(__constructor__)
543 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
545 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
546 // this runs during initialization.
547 #define CONSTRUCTOR_ATTRIBUTE
550 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
552 struct __processor_model {
553 unsigned int __cpu_vendor;
554 unsigned int __cpu_type;
555 unsigned int __cpu_subtype;
556 unsigned int __cpu_features[1];
557 } __cpu_model = {0, 0, 0, {0}};
559 /* A constructor function that is sets __cpu_model and __cpu_features with
560 the right values. This needs to run only once. This constructor is
561 given the highest priority and it should run before constructors without
562 the priority set. However, it still runs after ifunc initializers and
563 needs to be called explicitly there. */
565 int CONSTRUCTOR_ATTRIBUTE
566 __cpu_indicator_init(void) {
567 unsigned EAX, EBX, ECX, EDX;
568 unsigned MaxLeaf = 5;
570 unsigned Model, Family, Brand_id;
571 unsigned Features = 0;
573 /* This function needs to run just once. */
574 if (__cpu_model.__cpu_vendor)
577 if (!isCpuIdSupported())
580 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
581 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
582 __cpu_model.__cpu_vendor = VENDOR_OTHER;
585 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
586 detectX86FamilyModel(EAX, &Family, &Model);
587 Brand_id = EBX & 0xff;
589 /* Find available features. */
590 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
591 __cpu_model.__cpu_features[0] = Features;
593 if (Vendor == SIG_INTEL) {
595 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
596 &(__cpu_model.__cpu_type),
597 &(__cpu_model.__cpu_subtype));
598 __cpu_model.__cpu_vendor = VENDOR_INTEL;
599 } else if (Vendor == SIG_AMD) {
601 getAMDProcessorTypeAndSubtype(Family, Model, Features,
602 &(__cpu_model.__cpu_type),
603 &(__cpu_model.__cpu_subtype));
604 __cpu_model.__cpu_vendor = VENDOR_AMD;
606 __cpu_model.__cpu_vendor = VENDOR_OTHER;
608 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
609 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
610 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);