1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is based on LLVM's lib/Support/Host.cpp.
11 // It implements the operating system Host concept and builtin
12 // __cpu_model for the compiler_rt library, for x86 only.
14 //===----------------------------------------------------------------------===//
16 #if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
30 #ifndef __has_attribute
31 #define __has_attribute(attr) 0
34 enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
39 enum ProcessorVendors {
61 enum ProcessorSubtypes {
62 INTEL_COREI7_NEHALEM = 1,
63 INTEL_COREI7_WESTMERE,
64 INTEL_COREI7_SANDYBRIDGE,
73 INTEL_COREI7_IVYBRIDGE,
75 INTEL_COREI7_BROADWELL,
77 INTEL_COREI7_SKYLAKE_AVX512,
78 INTEL_COREI7_CANNONLAKE,
82 enum ProcessorFeatures {
111 FEATURE_AVX5124VNNIW,
112 FEATURE_AVX5124FMAPS,
113 FEATURE_AVX512VPOPCNTDQ
116 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
117 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
118 // support. Consequently, for i386, the presence of CPUID is checked first
119 // via the corresponding eflags bit.
120 static bool isCpuIdSupported() {
121 #if defined(__GNUC__) || defined(__clang__)
122 #if defined(__i386__)
123 int __cpuid_supported;
126 " movl %%eax,%%ecx\n"
127 " xorl $0x00200000,%%eax\n"
133 " cmpl %%eax,%%ecx\n"
137 : "=r"(__cpuid_supported)
140 if (!__cpuid_supported)
148 // This code is copied from lib/Support/Host.cpp.
149 // Changes to either file should be mirrored in the other.
151 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
152 /// the specified arguments. If we can't run cpuid on the host, return true.
153 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
154 unsigned *rECX, unsigned *rEDX) {
155 #if defined(__GNUC__) || defined(__clang__)
156 #if defined(__x86_64__)
157 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
158 // FIXME: should we save this for Clang?
159 __asm__("movq\t%%rbx, %%rsi\n\t"
161 "xchgq\t%%rbx, %%rsi\n\t"
162 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
165 #elif defined(__i386__)
166 __asm__("movl\t%%ebx, %%esi\n\t"
168 "xchgl\t%%ebx, %%esi\n\t"
169 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
175 #elif defined(_MSC_VER)
176 // The MSVC intrinsic is portable across x86 and x64.
178 __cpuid(registers, value);
179 *rEAX = registers[0];
180 *rEBX = registers[1];
181 *rECX = registers[2];
182 *rEDX = registers[3];
189 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
190 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
192 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
193 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
195 #if defined(__GNUC__) || defined(__clang__)
196 #if defined(__x86_64__)
197 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
198 // FIXME: should we save this for Clang?
199 __asm__("movq\t%%rbx, %%rsi\n\t"
201 "xchgq\t%%rbx, %%rsi\n\t"
202 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
203 : "a"(value), "c"(subleaf));
205 #elif defined(__i386__)
206 __asm__("movl\t%%ebx, %%esi\n\t"
208 "xchgl\t%%ebx, %%esi\n\t"
209 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
210 : "a"(value), "c"(subleaf));
215 #elif defined(_MSC_VER)
217 __cpuidex(registers, value, subleaf);
218 *rEAX = registers[0];
219 *rEBX = registers[1];
220 *rECX = registers[2];
221 *rEDX = registers[3];
228 // Read control register 0 (XCR0). Used to detect features such as AVX.
229 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
230 #if defined(__GNUC__) || defined(__clang__)
231 // Check xgetbv; this uses a .byte sequence instead of the instruction
232 // directly because older assemblers do not include support for xgetbv and
233 // there is no easy way to conditionally compile based on the assembler used.
234 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
236 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
237 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
239 *rEDX = Result >> 32;
246 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
248 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
249 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
250 if (*Family == 6 || *Family == 0xf) {
252 // Examine extended family ID if family ID is F.
253 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
254 // Examine extended model ID if family ID is 6 or F.
255 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
260 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
261 unsigned Brand_id, unsigned Features,
262 unsigned *Type, unsigned *Subtype) {
268 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
269 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
270 // mobile processor, Intel Core 2 Extreme processor, Intel
271 // Pentium Dual-Core processor, Intel Xeon processor, model
272 // 0Fh. All processors are manufactured using the 65 nm process.
273 case 0x16: // Intel Celeron processor model 16h. All processors are
274 // manufactured using the 65 nm process
275 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
276 // 17h. All processors are manufactured using the 45 nm process.
278 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
279 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
280 // the 45 nm process.
281 *Type = INTEL_CORE2; // "penryn"
283 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
284 // processors are manufactured using the 45 nm process.
285 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
286 // As found in a Summer 2010 model iMac.
288 case 0x2e: // Nehalem EX
289 *Type = INTEL_COREI7; // "nehalem"
290 *Subtype = INTEL_COREI7_NEHALEM;
292 case 0x25: // Intel Core i7, laptop version.
293 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
294 // processors are manufactured using the 32 nm process.
295 case 0x2f: // Westmere EX
296 *Type = INTEL_COREI7; // "westmere"
297 *Subtype = INTEL_COREI7_WESTMERE;
299 case 0x2a: // Intel Core i7 processor. All processors are manufactured
300 // using the 32 nm process.
302 *Type = INTEL_COREI7; //"sandybridge"
303 *Subtype = INTEL_COREI7_SANDYBRIDGE;
306 case 0x3e: // Ivy Bridge EP
307 *Type = INTEL_COREI7; // "ivybridge"
308 *Subtype = INTEL_COREI7_IVYBRIDGE;
316 *Type = INTEL_COREI7; // "haswell"
317 *Subtype = INTEL_COREI7_HASWELL;
325 *Type = INTEL_COREI7; // "broadwell"
326 *Subtype = INTEL_COREI7_BROADWELL;
330 case 0x4e: // Skylake mobile
331 case 0x5e: // Skylake desktop
332 case 0x8e: // Kaby Lake mobile
333 case 0x9e: // Kaby Lake desktop
334 *Type = INTEL_COREI7; // "skylake"
335 *Subtype = INTEL_COREI7_SKYLAKE;
340 *Type = INTEL_COREI7;
341 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
346 *Type = INTEL_COREI7;
347 *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
350 case 0x1c: // Most 45 nm Intel Atom processors
351 case 0x26: // 45 nm Atom Lincroft
352 case 0x27: // 32 nm Atom Medfield
353 case 0x35: // 32 nm Atom Midview
354 case 0x36: // 32 nm Atom Midview
355 *Type = INTEL_BONNELL;
358 // Atom Silvermont codes from the Intel software optimization guide.
364 case 0x4c: // really airmont
365 *Type = INTEL_SILVERMONT;
366 break; // "silvermont"
369 *Type = INTEL_KNL; // knl
373 *Type = INTEL_KNM; // knm
376 default: // Unknown family 6 CPU.
385 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
386 unsigned Features, unsigned *Type,
388 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
389 // appears to be no way to generate the wide variety of AMD-specific targets
390 // from the information returned from CPUID.
393 *Type = AMDFAM10H; // "amdfam10"
396 *Subtype = AMDFAM10H_BARCELONA;
399 *Subtype = AMDFAM10H_SHANGHAI;
402 *Subtype = AMDFAM10H_ISTANBUL;
411 if (Model >= 0x60 && Model <= 0x7f) {
412 *Subtype = AMDFAM15H_BDVER4;
413 break; // "bdver4"; 60h-7Fh: Excavator
415 if (Model >= 0x30 && Model <= 0x3f) {
416 *Subtype = AMDFAM15H_BDVER3;
417 break; // "bdver3"; 30h-3Fh: Steamroller
419 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
420 *Subtype = AMDFAM15H_BDVER2;
421 break; // "bdver2"; 02h, 10h-1Fh: Piledriver
424 *Subtype = AMDFAM15H_BDVER1;
425 break; // "bdver1"; 00h-0Fh: Bulldozer
433 *Subtype = AMDFAM17H_ZNVER1;
440 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
441 unsigned *FeaturesOut) {
442 unsigned Features = 0;
446 Features |= 1 << FEATURE_CMOV;
448 Features |= 1 << FEATURE_MMX;
450 Features |= 1 << FEATURE_SSE;
452 Features |= 1 << FEATURE_SSE2;
455 Features |= 1 << FEATURE_SSE3;
457 Features |= 1 << FEATURE_PCLMUL;
459 Features |= 1 << FEATURE_SSSE3;
461 Features |= 1 << FEATURE_FMA;
463 Features |= 1 << FEATURE_SSE4_1;
465 Features |= 1 << FEATURE_SSE4_2;
467 Features |= 1 << FEATURE_POPCNT;
469 Features |= 1 << FEATURE_AES;
471 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
472 // indicates that the AVX registers will be saved and restored on context
473 // switch, then we have full AVX support.
474 const unsigned AVXBits = (1 << 27) | (1 << 28);
475 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
476 ((EAX & 0x6) == 0x6);
477 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
480 Features |= 1 << FEATURE_AVX;
483 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
485 if (HasLeaf7 && ((EBX >> 3) & 1))
486 Features |= 1 << FEATURE_BMI;
487 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
488 Features |= 1 << FEATURE_AVX2;
489 if (HasLeaf7 && ((EBX >> 9) & 1))
490 Features |= 1 << FEATURE_BMI2;
491 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
492 Features |= 1 << FEATURE_AVX512F;
493 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
494 Features |= 1 << FEATURE_AVX512DQ;
495 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
496 Features |= 1 << FEATURE_AVX512IFMA;
497 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
498 Features |= 1 << FEATURE_AVX512PF;
499 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
500 Features |= 1 << FEATURE_AVX512ER;
501 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
502 Features |= 1 << FEATURE_AVX512CD;
503 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
504 Features |= 1 << FEATURE_AVX512BW;
505 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
506 Features |= 1 << FEATURE_AVX512VL;
508 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
509 Features |= 1 << FEATURE_AVX512VBMI;
510 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
511 Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
513 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
514 Features |= 1 << FEATURE_AVX5124VNNIW;
515 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
516 Features |= 1 << FEATURE_AVX5124FMAPS;
518 unsigned MaxExtLevel;
519 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
521 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
522 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
523 if (HasExtLeaf1 && ((ECX >> 6) & 1))
524 Features |= 1 << FEATURE_SSE4_A;
525 if (HasExtLeaf1 && ((ECX >> 11) & 1))
526 Features |= 1 << FEATURE_XOP;
527 if (HasExtLeaf1 && ((ECX >> 16) & 1))
528 Features |= 1 << FEATURE_FMA4;
530 *FeaturesOut = Features;
533 #if defined(HAVE_INIT_PRIORITY)
534 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
535 #elif __has_attribute(__constructor__)
536 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
538 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
539 // this runs during initialization.
540 #define CONSTRUCTOR_ATTRIBUTE
543 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
545 struct __processor_model {
546 unsigned int __cpu_vendor;
547 unsigned int __cpu_type;
548 unsigned int __cpu_subtype;
549 unsigned int __cpu_features[1];
550 } __cpu_model = {0, 0, 0, {0}};
552 /* A constructor function that is sets __cpu_model and __cpu_features with
553 the right values. This needs to run only once. This constructor is
554 given the highest priority and it should run before constructors without
555 the priority set. However, it still runs after ifunc initializers and
556 needs to be called explicitly there. */
558 int CONSTRUCTOR_ATTRIBUTE
559 __cpu_indicator_init(void) {
560 unsigned EAX, EBX, ECX, EDX;
561 unsigned MaxLeaf = 5;
563 unsigned Model, Family, Brand_id;
564 unsigned Features = 0;
566 /* This function needs to run just once. */
567 if (__cpu_model.__cpu_vendor)
570 if (!isCpuIdSupported())
573 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
574 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
575 __cpu_model.__cpu_vendor = VENDOR_OTHER;
578 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
579 detectX86FamilyModel(EAX, &Family, &Model);
580 Brand_id = EBX & 0xff;
582 /* Find available features. */
583 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
584 __cpu_model.__cpu_features[0] = Features;
586 if (Vendor == SIG_INTEL) {
588 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
589 &(__cpu_model.__cpu_type),
590 &(__cpu_model.__cpu_subtype));
591 __cpu_model.__cpu_vendor = VENDOR_INTEL;
592 } else if (Vendor == SIG_AMD) {
594 getAMDProcessorTypeAndSubtype(Family, Model, Features,
595 &(__cpu_model.__cpu_type),
596 &(__cpu_model.__cpu_subtype));
597 __cpu_model.__cpu_vendor = VENDOR_AMD;
599 __cpu_model.__cpu_vendor = VENDOR_OTHER;
601 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
602 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
603 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);