1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library, for x86 only.
13 //===----------------------------------------------------------------------===//
15 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
17 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
29 #ifndef __has_attribute
30 #define __has_attribute(attr) 0
33 enum VendorSignatures {
34 SIG_INTEL = 0x756e6547, // Genu
35 SIG_AMD = 0x68747541, // Auth
38 enum ProcessorVendors {
63 enum ProcessorSubtypes {
64 INTEL_COREI7_NEHALEM = 1,
65 INTEL_COREI7_WESTMERE,
66 INTEL_COREI7_SANDYBRIDGE,
75 INTEL_COREI7_IVYBRIDGE,
77 INTEL_COREI7_BROADWELL,
79 INTEL_COREI7_SKYLAKE_AVX512,
80 INTEL_COREI7_CANNONLAKE,
81 INTEL_COREI7_ICELAKE_CLIENT,
82 INTEL_COREI7_ICELAKE_SERVER,
84 INTEL_COREI7_CASCADELAKE,
88 enum ProcessorFeatures {
117 FEATURE_AVX5124VNNIW,
118 FEATURE_AVX5124FMAPS,
119 FEATURE_AVX512VPOPCNTDQ,
124 FEATURE_AVX512BITALG,
128 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
129 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
130 // support. Consequently, for i386, the presence of CPUID is checked first
131 // via the corresponding eflags bit.
132 static bool isCpuIdSupported() {
133 #if defined(__GNUC__) || defined(__clang__)
134 #if defined(__i386__)
135 int __cpuid_supported;
138 " movl %%eax,%%ecx\n"
139 " xorl $0x00200000,%%eax\n"
145 " cmpl %%eax,%%ecx\n"
149 : "=r"(__cpuid_supported)
152 if (!__cpuid_supported)
160 // This code is copied from lib/Support/Host.cpp.
161 // Changes to either file should be mirrored in the other.
163 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
164 /// the specified arguments. If we can't run cpuid on the host, return true.
165 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
166 unsigned *rECX, unsigned *rEDX) {
167 #if defined(__GNUC__) || defined(__clang__)
168 #if defined(__x86_64__)
169 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
170 // FIXME: should we save this for Clang?
171 __asm__("movq\t%%rbx, %%rsi\n\t"
173 "xchgq\t%%rbx, %%rsi\n\t"
174 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
177 #elif defined(__i386__)
178 __asm__("movl\t%%ebx, %%esi\n\t"
180 "xchgl\t%%ebx, %%esi\n\t"
181 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
187 #elif defined(_MSC_VER)
188 // The MSVC intrinsic is portable across x86 and x64.
190 __cpuid(registers, value);
191 *rEAX = registers[0];
192 *rEBX = registers[1];
193 *rECX = registers[2];
194 *rEDX = registers[3];
201 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
202 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
204 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
205 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
207 #if defined(__GNUC__) || defined(__clang__)
208 #if defined(__x86_64__)
209 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
210 // FIXME: should we save this for Clang?
211 __asm__("movq\t%%rbx, %%rsi\n\t"
213 "xchgq\t%%rbx, %%rsi\n\t"
214 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
215 : "a"(value), "c"(subleaf));
217 #elif defined(__i386__)
218 __asm__("movl\t%%ebx, %%esi\n\t"
220 "xchgl\t%%ebx, %%esi\n\t"
221 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
222 : "a"(value), "c"(subleaf));
227 #elif defined(_MSC_VER)
229 __cpuidex(registers, value, subleaf);
230 *rEAX = registers[0];
231 *rEBX = registers[1];
232 *rECX = registers[2];
233 *rEDX = registers[3];
240 // Read control register 0 (XCR0). Used to detect features such as AVX.
241 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
242 #if defined(__GNUC__) || defined(__clang__)
243 // Check xgetbv; this uses a .byte sequence instead of the instruction
244 // directly because older assemblers do not include support for xgetbv and
245 // there is no easy way to conditionally compile based on the assembler used.
246 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
248 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
249 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
251 *rEDX = Result >> 32;
258 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
260 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
261 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
262 if (*Family == 6 || *Family == 0xf) {
264 // Examine extended family ID if family ID is F.
265 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
266 // Examine extended model ID if family ID is 6 or F.
267 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
271 static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
274 unsigned Features2, unsigned *Type,
281 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
282 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
283 // mobile processor, Intel Core 2 Extreme processor, Intel
284 // Pentium Dual-Core processor, Intel Xeon processor, model
285 // 0Fh. All processors are manufactured using the 65 nm process.
286 case 0x16: // Intel Celeron processor model 16h. All processors are
287 // manufactured using the 65 nm process
288 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
289 // 17h. All processors are manufactured using the 45 nm process.
291 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
292 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
293 // the 45 nm process.
294 *Type = INTEL_CORE2; // "penryn"
296 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
297 // processors are manufactured using the 45 nm process.
298 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
299 // As found in a Summer 2010 model iMac.
301 case 0x2e: // Nehalem EX
302 *Type = INTEL_COREI7; // "nehalem"
303 *Subtype = INTEL_COREI7_NEHALEM;
305 case 0x25: // Intel Core i7, laptop version.
306 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
307 // processors are manufactured using the 32 nm process.
308 case 0x2f: // Westmere EX
309 *Type = INTEL_COREI7; // "westmere"
310 *Subtype = INTEL_COREI7_WESTMERE;
312 case 0x2a: // Intel Core i7 processor. All processors are manufactured
313 // using the 32 nm process.
315 *Type = INTEL_COREI7; //"sandybridge"
316 *Subtype = INTEL_COREI7_SANDYBRIDGE;
319 case 0x3e: // Ivy Bridge EP
320 *Type = INTEL_COREI7; // "ivybridge"
321 *Subtype = INTEL_COREI7_IVYBRIDGE;
329 *Type = INTEL_COREI7; // "haswell"
330 *Subtype = INTEL_COREI7_HASWELL;
338 *Type = INTEL_COREI7; // "broadwell"
339 *Subtype = INTEL_COREI7_BROADWELL;
343 case 0x4e: // Skylake mobile
344 case 0x5e: // Skylake desktop
345 case 0x8e: // Kaby Lake mobile
346 case 0x9e: // Kaby Lake desktop
347 *Type = INTEL_COREI7; // "skylake"
348 *Subtype = INTEL_COREI7_SKYLAKE;
353 *Type = INTEL_COREI7;
354 if (Features2 & (1 << (FEATURE_AVX512VNNI - 32)))
355 *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake"
357 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
362 *Type = INTEL_COREI7;
363 *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
369 *Type = INTEL_COREI7;
370 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
376 *Type = INTEL_COREI7;
377 *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
380 case 0x1c: // Most 45 nm Intel Atom processors
381 case 0x26: // 45 nm Atom Lincroft
382 case 0x27: // 32 nm Atom Medfield
383 case 0x35: // 32 nm Atom Midview
384 case 0x36: // 32 nm Atom Midview
385 *Type = INTEL_BONNELL;
388 // Atom Silvermont codes from the Intel software optimization guide.
394 case 0x4c: // really airmont
395 *Type = INTEL_SILVERMONT;
396 break; // "silvermont"
398 case 0x5c: // Apollo Lake
399 case 0x5f: // Denverton
400 *Type = INTEL_GOLDMONT;
403 *Type = INTEL_GOLDMONT_PLUS;
406 *Type = INTEL_TREMONT;
410 *Type = INTEL_KNL; // knl
414 *Type = INTEL_KNM; // knm
417 default: // Unknown family 6 CPU.
426 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
427 unsigned Features, unsigned Features2,
428 unsigned *Type, unsigned *Subtype) {
429 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
430 // appears to be no way to generate the wide variety of AMD-specific targets
431 // from the information returned from CPUID.
434 *Type = AMDFAM10H; // "amdfam10"
437 *Subtype = AMDFAM10H_BARCELONA;
440 *Subtype = AMDFAM10H_SHANGHAI;
443 *Subtype = AMDFAM10H_ISTANBUL;
452 if (Model >= 0x60 && Model <= 0x7f) {
453 *Subtype = AMDFAM15H_BDVER4;
454 break; // "bdver4"; 60h-7Fh: Excavator
456 if (Model >= 0x30 && Model <= 0x3f) {
457 *Subtype = AMDFAM15H_BDVER3;
458 break; // "bdver3"; 30h-3Fh: Steamroller
460 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
461 *Subtype = AMDFAM15H_BDVER2;
462 break; // "bdver2"; 02h, 10h-1Fh: Piledriver
465 *Subtype = AMDFAM15H_BDVER1;
466 break; // "bdver1"; 00h-0Fh: Bulldozer
474 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
475 *Subtype = AMDFAM17H_ZNVER2;
476 break; // "znver2"; 30h-3fh, 71h: Zen2
479 *Subtype = AMDFAM17H_ZNVER1;
480 break; // "znver1"; 00h-0Fh: Zen1
488 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
489 unsigned *FeaturesOut,
490 unsigned *Features2Out) {
491 unsigned Features = 0;
492 unsigned Features2 = 0;
495 #define setFeature(F) \
498 Features |= 1U << (F & 0x1f); \
500 Features2 |= 1U << ((F - 32) & 0x1f); \
504 setFeature(FEATURE_CMOV);
506 setFeature(FEATURE_MMX);
508 setFeature(FEATURE_SSE);
510 setFeature(FEATURE_SSE2);
513 setFeature(FEATURE_SSE3);
515 setFeature(FEATURE_PCLMUL);
517 setFeature(FEATURE_SSSE3);
519 setFeature(FEATURE_FMA);
521 setFeature(FEATURE_SSE4_1);
523 setFeature(FEATURE_SSE4_2);
525 setFeature(FEATURE_POPCNT);
527 setFeature(FEATURE_AES);
529 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
530 // indicates that the AVX registers will be saved and restored on context
531 // switch, then we have full AVX support.
532 const unsigned AVXBits = (1 << 27) | (1 << 28);
533 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
534 ((EAX & 0x6) == 0x6);
535 #if defined(__APPLE__)
536 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
537 // save the AVX512 context if we use AVX512 instructions, even the bit is not
539 bool HasAVX512Save = true;
541 // AVX512 requires additional context to be saved by the OS.
542 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
546 setFeature(FEATURE_AVX);
549 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
551 if (HasLeaf7 && ((EBX >> 3) & 1))
552 setFeature(FEATURE_BMI);
553 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
554 setFeature(FEATURE_AVX2);
555 if (HasLeaf7 && ((EBX >> 8) & 1))
556 setFeature(FEATURE_BMI2);
557 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
558 setFeature(FEATURE_AVX512F);
559 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
560 setFeature(FEATURE_AVX512DQ);
561 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
562 setFeature(FEATURE_AVX512IFMA);
563 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
564 setFeature(FEATURE_AVX512PF);
565 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
566 setFeature(FEATURE_AVX512ER);
567 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
568 setFeature(FEATURE_AVX512CD);
569 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
570 setFeature(FEATURE_AVX512BW);
571 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
572 setFeature(FEATURE_AVX512VL);
574 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
575 setFeature(FEATURE_AVX512VBMI);
576 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
577 setFeature(FEATURE_AVX512VBMI2);
578 if (HasLeaf7 && ((ECX >> 8) & 1))
579 setFeature(FEATURE_GFNI);
580 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
581 setFeature(FEATURE_VPCLMULQDQ);
582 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
583 setFeature(FEATURE_AVX512VNNI);
584 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
585 setFeature(FEATURE_AVX512BITALG);
586 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
587 setFeature(FEATURE_AVX512VPOPCNTDQ);
589 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
590 setFeature(FEATURE_AVX5124VNNIW);
591 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
592 setFeature(FEATURE_AVX5124FMAPS);
594 bool HasLeaf7Subleaf1 =
595 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
596 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
597 setFeature(FEATURE_AVX512BF16);
599 unsigned MaxExtLevel;
600 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
602 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
603 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
604 if (HasExtLeaf1 && ((ECX >> 6) & 1))
605 setFeature(FEATURE_SSE4_A);
606 if (HasExtLeaf1 && ((ECX >> 11) & 1))
607 setFeature(FEATURE_XOP);
608 if (HasExtLeaf1 && ((ECX >> 16) & 1))
609 setFeature(FEATURE_FMA4);
611 *FeaturesOut = Features;
612 *Features2Out = Features2;
616 #if defined(HAVE_INIT_PRIORITY)
617 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
618 #elif __has_attribute(__constructor__)
619 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
621 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
622 // this runs during initialization.
623 #define CONSTRUCTOR_ATTRIBUTE
627 __attribute__((visibility("hidden")))
629 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
632 __attribute__((visibility("hidden")))
634 struct __processor_model {
635 unsigned int __cpu_vendor;
636 unsigned int __cpu_type;
637 unsigned int __cpu_subtype;
638 unsigned int __cpu_features[1];
639 } __cpu_model = {0, 0, 0, {0}};
642 __attribute__((visibility("hidden")))
644 unsigned int __cpu_features2;
646 // A constructor function that is sets __cpu_model and __cpu_features2 with
647 // the right values. This needs to run only once. This constructor is
648 // given the highest priority and it should run before constructors without
649 // the priority set. However, it still runs after ifunc initializers and
650 // needs to be called explicitly there.
652 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
653 unsigned EAX, EBX, ECX, EDX;
654 unsigned MaxLeaf = 5;
656 unsigned Model, Family, Brand_id;
657 unsigned Features = 0;
658 unsigned Features2 = 0;
660 // This function needs to run just once.
661 if (__cpu_model.__cpu_vendor)
664 if (!isCpuIdSupported())
667 // Assume cpuid insn present. Run in level 0 to get vendor id.
668 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
669 __cpu_model.__cpu_vendor = VENDOR_OTHER;
672 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
673 detectX86FamilyModel(EAX, &Family, &Model);
674 Brand_id = EBX & 0xff;
676 // Find available features.
677 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
678 __cpu_model.__cpu_features[0] = Features;
679 __cpu_features2 = Features2;
681 if (Vendor == SIG_INTEL) {
683 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
684 Features2, &(__cpu_model.__cpu_type),
685 &(__cpu_model.__cpu_subtype));
686 __cpu_model.__cpu_vendor = VENDOR_INTEL;
687 } else if (Vendor == SIG_AMD) {
689 getAMDProcessorTypeAndSubtype(Family, Model, Features, Features2,
690 &(__cpu_model.__cpu_type),
691 &(__cpu_model.__cpu_subtype));
692 __cpu_model.__cpu_vendor = VENDOR_AMD;
694 __cpu_model.__cpu_vendor = VENDOR_OTHER;
696 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
697 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
698 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);