1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is based on LLVM's lib/Support/Host.cpp.
11 // It implements the operating system Host concept and builtin
12 // __cpu_model for the compiler_rt library, for x86 only.
14 //===----------------------------------------------------------------------===//
16 #if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
30 #ifndef __has_attribute
31 #define __has_attribute(attr) 0
34 enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
39 enum ProcessorVendors {
60 enum ProcessorSubtypes {
61 INTEL_COREI7_NEHALEM = 1,
62 INTEL_COREI7_WESTMERE,
63 INTEL_COREI7_SANDYBRIDGE,
72 INTEL_COREI7_IVYBRIDGE,
74 INTEL_COREI7_BROADWELL,
76 INTEL_COREI7_SKYLAKE_AVX512,
80 enum ProcessorFeatures {
109 FEATURE_AVX5124VNNIW,
110 FEATURE_AVX5124FMAPS,
111 FEATURE_AVX512VPOPCNTDQ
114 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
115 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
116 // support. Consequently, for i386, the presence of CPUID is checked first
117 // via the corresponding eflags bit.
118 static bool isCpuIdSupported() {
119 #if defined(__GNUC__) || defined(__clang__)
120 #if defined(__i386__)
121 int __cpuid_supported;
124 " movl %%eax,%%ecx\n"
125 " xorl $0x00200000,%%eax\n"
131 " cmpl %%eax,%%ecx\n"
135 : "=r"(__cpuid_supported)
138 if (!__cpuid_supported)
146 // This code is copied from lib/Support/Host.cpp.
147 // Changes to either file should be mirrored in the other.
149 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
150 /// the specified arguments. If we can't run cpuid on the host, return true.
151 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
152 unsigned *rECX, unsigned *rEDX) {
153 #if defined(__GNUC__) || defined(__clang__)
154 #if defined(__x86_64__)
155 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
156 // FIXME: should we save this for Clang?
157 __asm__("movq\t%%rbx, %%rsi\n\t"
159 "xchgq\t%%rbx, %%rsi\n\t"
160 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
163 #elif defined(__i386__)
164 __asm__("movl\t%%ebx, %%esi\n\t"
166 "xchgl\t%%ebx, %%esi\n\t"
167 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
173 #elif defined(_MSC_VER)
174 // The MSVC intrinsic is portable across x86 and x64.
176 __cpuid(registers, value);
177 *rEAX = registers[0];
178 *rEBX = registers[1];
179 *rECX = registers[2];
180 *rEDX = registers[3];
187 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
188 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
190 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
191 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
193 #if defined(__GNUC__) || defined(__clang__)
194 #if defined(__x86_64__)
195 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
196 // FIXME: should we save this for Clang?
197 __asm__("movq\t%%rbx, %%rsi\n\t"
199 "xchgq\t%%rbx, %%rsi\n\t"
200 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
201 : "a"(value), "c"(subleaf));
203 #elif defined(__i386__)
204 __asm__("movl\t%%ebx, %%esi\n\t"
206 "xchgl\t%%ebx, %%esi\n\t"
207 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
208 : "a"(value), "c"(subleaf));
213 #elif defined(_MSC_VER)
215 __cpuidex(registers, value, subleaf);
216 *rEAX = registers[0];
217 *rEBX = registers[1];
218 *rECX = registers[2];
219 *rEDX = registers[3];
226 // Read control register 0 (XCR0). Used to detect features such as AVX.
227 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
228 #if defined(__GNUC__) || defined(__clang__)
229 // Check xgetbv; this uses a .byte sequence instead of the instruction
230 // directly because older assemblers do not include support for xgetbv and
231 // there is no easy way to conditionally compile based on the assembler used.
232 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
234 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
235 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
237 *rEDX = Result >> 32;
244 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
246 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
247 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
248 if (*Family == 6 || *Family == 0xf) {
250 // Examine extended family ID if family ID is F.
251 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
252 // Examine extended model ID if family ID is 6 or F.
253 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
258 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
259 unsigned Brand_id, unsigned Features,
260 unsigned *Type, unsigned *Subtype) {
266 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
267 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
268 // mobile processor, Intel Core 2 Extreme processor, Intel
269 // Pentium Dual-Core processor, Intel Xeon processor, model
270 // 0Fh. All processors are manufactured using the 65 nm process.
271 case 0x16: // Intel Celeron processor model 16h. All processors are
272 // manufactured using the 65 nm process
273 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
274 // 17h. All processors are manufactured using the 45 nm process.
276 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
277 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
278 // the 45 nm process.
279 *Type = INTEL_CORE2; // "penryn"
281 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
282 // processors are manufactured using the 45 nm process.
283 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
284 // As found in a Summer 2010 model iMac.
286 case 0x2e: // Nehalem EX
287 *Type = INTEL_COREI7; // "nehalem"
288 *Subtype = INTEL_COREI7_NEHALEM;
290 case 0x25: // Intel Core i7, laptop version.
291 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
292 // processors are manufactured using the 32 nm process.
293 case 0x2f: // Westmere EX
294 *Type = INTEL_COREI7; // "westmere"
295 *Subtype = INTEL_COREI7_WESTMERE;
297 case 0x2a: // Intel Core i7 processor. All processors are manufactured
298 // using the 32 nm process.
300 *Type = INTEL_COREI7; //"sandybridge"
301 *Subtype = INTEL_COREI7_SANDYBRIDGE;
304 case 0x3e: // Ivy Bridge EP
305 *Type = INTEL_COREI7; // "ivybridge"
306 *Subtype = INTEL_COREI7_IVYBRIDGE;
314 *Type = INTEL_COREI7; // "haswell"
315 *Subtype = INTEL_COREI7_HASWELL;
323 *Type = INTEL_COREI7; // "broadwell"
324 *Subtype = INTEL_COREI7_BROADWELL;
328 case 0x4e: // Skylake mobile
329 case 0x5e: // Skylake desktop
330 case 0x8e: // Kaby Lake mobile
331 case 0x9e: // Kaby Lake desktop
332 *Type = INTEL_COREI7; // "skylake"
333 *Subtype = INTEL_COREI7_SKYLAKE;
338 *Type = INTEL_COREI7;
339 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
342 case 0x1c: // Most 45 nm Intel Atom processors
343 case 0x26: // 45 nm Atom Lincroft
344 case 0x27: // 32 nm Atom Medfield
345 case 0x35: // 32 nm Atom Midview
346 case 0x36: // 32 nm Atom Midview
347 *Type = INTEL_BONNELL;
350 // Atom Silvermont codes from the Intel software optimization guide.
356 case 0x4c: // really airmont
357 *Type = INTEL_SILVERMONT;
358 break; // "silvermont"
361 *Type = INTEL_KNL; // knl
364 default: // Unknown family 6 CPU.
373 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
374 unsigned Features, unsigned *Type,
376 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
377 // appears to be no way to generate the wide variety of AMD-specific targets
378 // from the information returned from CPUID.
381 *Type = AMDFAM10H; // "amdfam10"
384 *Subtype = AMDFAM10H_BARCELONA;
387 *Subtype = AMDFAM10H_SHANGHAI;
390 *Subtype = AMDFAM10H_ISTANBUL;
399 if (Model >= 0x60 && Model <= 0x7f) {
400 *Subtype = AMDFAM15H_BDVER4;
401 break; // "bdver4"; 60h-7Fh: Excavator
403 if (Model >= 0x30 && Model <= 0x3f) {
404 *Subtype = AMDFAM15H_BDVER3;
405 break; // "bdver3"; 30h-3Fh: Steamroller
407 if (Model >= 0x10 && Model <= 0x1f) {
408 *Subtype = AMDFAM15H_BDVER2;
409 break; // "bdver2"; 10h-1Fh: Piledriver
412 *Subtype = AMDFAM15H_BDVER1;
413 break; // "bdver1"; 00h-0Fh: Bulldozer
421 *Subtype = AMDFAM17H_ZNVER1;
428 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
429 unsigned *FeaturesOut) {
430 unsigned Features = 0;
434 Features |= 1 << FEATURE_CMOV;
436 Features |= 1 << FEATURE_MMX;
438 Features |= 1 << FEATURE_SSE;
440 Features |= 1 << FEATURE_SSE2;
443 Features |= 1 << FEATURE_SSE3;
445 Features |= 1 << FEATURE_PCLMUL;
447 Features |= 1 << FEATURE_SSSE3;
449 Features |= 1 << FEATURE_FMA;
451 Features |= 1 << FEATURE_SSE4_1;
453 Features |= 1 << FEATURE_SSE4_2;
455 Features |= 1 << FEATURE_POPCNT;
457 Features |= 1 << FEATURE_AES;
459 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
460 // indicates that the AVX registers will be saved and restored on context
461 // switch, then we have full AVX support.
462 const unsigned AVXBits = (1 << 27) | (1 << 28);
463 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
464 ((EAX & 0x6) == 0x6);
465 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
468 Features |= 1 << FEATURE_AVX;
471 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
473 if (HasLeaf7 && ((EBX >> 3) & 1))
474 Features |= 1 << FEATURE_BMI;
475 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
476 Features |= 1 << FEATURE_AVX2;
477 if (HasLeaf7 && ((EBX >> 9) & 1))
478 Features |= 1 << FEATURE_BMI2;
479 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
480 Features |= 1 << FEATURE_AVX512F;
481 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
482 Features |= 1 << FEATURE_AVX512DQ;
483 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
484 Features |= 1 << FEATURE_AVX512IFMA;
485 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
486 Features |= 1 << FEATURE_AVX512PF;
487 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
488 Features |= 1 << FEATURE_AVX512ER;
489 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
490 Features |= 1 << FEATURE_AVX512CD;
491 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
492 Features |= 1 << FEATURE_AVX512BW;
493 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
494 Features |= 1 << FEATURE_AVX512VL;
496 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
497 Features |= 1 << FEATURE_AVX512VBMI;
498 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
499 Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
501 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
502 Features |= 1 << FEATURE_AVX5124VNNIW;
503 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
504 Features |= 1 << FEATURE_AVX5124FMAPS;
506 unsigned MaxExtLevel;
507 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
509 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
510 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
511 if (HasExtLeaf1 && ((ECX >> 6) & 1))
512 Features |= 1 << FEATURE_SSE4_A;
513 if (HasExtLeaf1 && ((ECX >> 11) & 1))
514 Features |= 1 << FEATURE_XOP;
515 if (HasExtLeaf1 && ((ECX >> 16) & 1))
516 Features |= 1 << FEATURE_FMA4;
518 *FeaturesOut = Features;
521 #if defined(HAVE_INIT_PRIORITY)
522 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
523 #elif __has_attribute(__constructor__)
524 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
526 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
527 // this runs during initialization.
528 #define CONSTRUCTOR_ATTRIBUTE
531 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
533 struct __processor_model {
534 unsigned int __cpu_vendor;
535 unsigned int __cpu_type;
536 unsigned int __cpu_subtype;
537 unsigned int __cpu_features[1];
538 } __cpu_model = {0, 0, 0, {0}};
540 /* A constructor function that is sets __cpu_model and __cpu_features with
541 the right values. This needs to run only once. This constructor is
542 given the highest priority and it should run before constructors without
543 the priority set. However, it still runs after ifunc initializers and
544 needs to be called explicitly there. */
546 int CONSTRUCTOR_ATTRIBUTE
547 __cpu_indicator_init(void) {
548 unsigned EAX, EBX, ECX, EDX;
549 unsigned MaxLeaf = 5;
551 unsigned Model, Family, Brand_id;
552 unsigned Features = 0;
554 /* This function needs to run just once. */
555 if (__cpu_model.__cpu_vendor)
558 if (!isCpuIdSupported())
561 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
562 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
563 __cpu_model.__cpu_vendor = VENDOR_OTHER;
566 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
567 detectX86FamilyModel(EAX, &Family, &Model);
568 Brand_id = EBX & 0xff;
570 /* Find available features. */
571 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
572 __cpu_model.__cpu_features[0] = Features;
574 if (Vendor == SIG_INTEL) {
576 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
577 &(__cpu_model.__cpu_type),
578 &(__cpu_model.__cpu_subtype));
579 __cpu_model.__cpu_vendor = VENDOR_INTEL;
580 } else if (Vendor == SIG_AMD) {
582 getAMDProcessorTypeAndSubtype(Family, Model, Features,
583 &(__cpu_model.__cpu_type),
584 &(__cpu_model.__cpu_subtype));
585 __cpu_model.__cpu_vendor = VENDOR_AMD;
587 __cpu_model.__cpu_vendor = VENDOR_OTHER;
589 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
590 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
591 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);