1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===------------------------------------------------------------===//
9 include "llvm/TableGen/SearchableTable.td"
10 include "llvm/Target/Target.td"
11 include "AMDGPUFeatures.td"
13 def p0 : PtrValueType<i64, 0>;
14 def p1 : PtrValueType<i64, 1>;
15 def p2 : PtrValueType<i32, 2>;
16 def p3 : PtrValueType<i32, 3>;
17 def p4 : PtrValueType<i64, 4>;
18 def p5 : PtrValueType<i32, 5>;
19 def p6 : PtrValueType<i32, 6>;
22 class BoolToList<bit Value> {
23 list<int> ret = !if(Value, [1]<int>, []<int>);
26 //===------------------------------------------------------------===//
27 // Subtarget Features (device properties)
28 //===------------------------------------------------------------===//
30 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
33 "Assuming f32 fma is at least as fast as mul + add"
36 def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32",
39 "Enabling denormals does not cause f32 instructions to run at f64 rates"
42 def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
45 "Support 128-bit texture resources"
48 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
51 "Most fp64 instructions are half rate instead of quarter"
54 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
57 "Support flat address space"
60 def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
63 "Flat instructions have immediate offset addressing mode"
66 def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
69 "Have global_* flat memory instructions"
72 def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
75 "Have scratch_* flat memory instructions"
78 def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
79 "ScalarFlatScratchInsts",
81 "Have s_scratch_* flat memory instructions"
84 def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
87 "Have VALU add/sub instructions without carry out"
90 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
91 "UnalignedBufferAccess",
93 "Support unaligned global loads and stores"
96 def FeatureTrapHandler: SubtargetFeature<"trap-handler",
99 "Trap handler support"
102 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
103 "UnalignedScratchAccess",
105 "Support unaligned scratch loads and stores"
108 def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
111 "Has Memory Aperture Base and Size Registers"
114 def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts",
117 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions"
120 def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
123 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
126 def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
127 "DoesNotSupportXNACK",
129 "Hardware does not support XNACK"
132 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
133 // XNACK. The current default kernel driver setting is:
134 // - graphics ring: XNACK disabled
135 // - compute ring: XNACK enabled
137 // If XNACK is enabled, the VMEM latency can be worse.
138 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
139 def FeatureXNACK : SubtargetFeature<"xnack",
142 "Enable XNACK support"
145 def FeatureCuMode : SubtargetFeature<"cumode",
148 "Enable CU wavefront execution mode"
151 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
154 "VI SGPR initialization bug requiring a fixed SGPR allocation size"
157 def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
160 "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
163 def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug",
164 "HasMFMAInlineLiteralBug",
166 "MFMA cannot use inline literal as SrcC"
169 def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
170 "HasVcmpxPermlaneHazard",
175 def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
176 "HasVMEMtoScalarWriteHazard",
178 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
181 def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
182 "HasSMEMtoVectorWriteHazard",
184 "s_load_dword followed by v_cmp page faults"
187 def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
188 "HasInstFwdPrefetchBug",
190 "S_INST_PREFETCH instruction causes shader to hang"
193 def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
194 "HasVcmpxExecWARHazard",
196 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
199 def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
200 "HasLdsBranchVmemWARHazard",
202 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
205 def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
208 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
211 def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
212 "HasFlatSegmentOffsetBug",
214 "GFX10 bug, inst_offset ignored in flat segment"
217 def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
220 "Branch offset of 3f hardware bug"
223 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
224 "ldsbankcount"#Value,
226 !cast<string>(Value),
227 "The number of LDS banks per compute unit."
230 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
231 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
233 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
236 "Encoding format for VI"
239 def FeatureCIInsts : SubtargetFeature<"ci-insts",
242 "Additional instructions for CI+"
245 def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts",
248 "Additional instructions for GFX8+"
251 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
254 "Additional instructions for GFX9+"
257 def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
260 "Additional instructions for GFX10+"
263 def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts",
266 "Additional instructions for GFX10.3"
269 def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
272 "Instructions shared in GFX7, GFX8, GFX9"
275 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
278 "Has s_memrealtime instruction"
281 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
282 "HasInv2PiInlineImm",
284 "Has 1 / (2 * pi) as inline immediate"
287 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
290 "Has i16/f16 instructions"
293 def FeatureVOP3P : SubtargetFeature<"vop3p",
296 "Has VOP3P packed instructions"
299 def FeatureMovrel : SubtargetFeature<"movrel",
302 "Has v_movrel*_b32 instructions"
305 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
308 "Has VGPR mode register indexing"
311 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
314 "Has store scalar memory instructions"
317 def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics",
320 "Has atomic scalar memory instructions"
323 def FeatureSDWA : SubtargetFeature<"sdwa",
326 "Support SDWA (Sub-DWORD Addressing) extension"
329 def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod",
332 "Support OMod with SDWA (Sub-DWORD Addressing) extension"
335 def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar",
338 "Support scalar register with SDWA (Sub-DWORD Addressing) extension"
341 def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst",
344 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension"
347 def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
350 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
353 def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
354 "HasSDWAOutModsVOPC",
356 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
359 def FeatureDPP : SubtargetFeature<"dpp",
362 "Support DPP (Data Parallel Primitives) extension"
365 // DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
366 def FeatureDPP8 : SubtargetFeature<"dpp8",
369 "Support DPP8 (Data Parallel Primitives) extension"
372 def FeatureR128A16 : SubtargetFeature<"r128-a16",
375 "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128"
378 def FeatureGFX10A16 : SubtargetFeature<"a16",
381 "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands"
384 def FeatureG16 : SubtargetFeature<"g16",
387 "Support G16 for 16-bit gradient image operands"
390 def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
393 "Support NSA encoding for image instructions"
396 def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding",
399 "Encoding format GFX10_B"
402 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
405 "Support clamp for integer destination"
408 def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
409 "HasUnpackedD16VMem",
411 "Has unpacked d16 vmem instructions"
414 def FeatureDLInsts : SubtargetFeature<"dl-insts",
417 "Has v_fmac_f32 and v_xnor_b32 instructions"
420 def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
423 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
426 def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
429 "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
432 def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
435 "Has v_dot8c_i32_i4 instruction"
438 def FeatureDot4Insts : SubtargetFeature<"dot4-insts",
441 "Has v_dot2c_i32_i16 instruction"
444 def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
447 "Has v_dot2c_f32_f16 instruction"
450 def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
453 "Has v_dot4c_i32_i8 instruction"
456 def FeatureMAIInsts : SubtargetFeature<"mai-insts",
459 "Has mAI instructions"
462 def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
465 "Has v_pk_fmac_f16 instruction"
468 def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
469 "HasAtomicFaddInsts",
471 "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
472 "global_atomic_pk_add_f16 instructions",
473 [FeatureFlatGlobalInsts]
476 def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
477 "DoesNotSupportSRAMECC",
479 "Hardware does not support SRAM ECC"
482 def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
488 def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
491 "V_CMPX does not write VCC/SGPR in addition to EXEC"
494 def FeatureVscnt : SubtargetFeature<"vscnt",
497 "Has separate store vscnt counter"
500 def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst",
503 "Has s_get_waveid_in_workgroup instruction"
506 def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst",
509 "Has s_memtime instruction"
512 def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts",
515 "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions"
518 def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts",
521 "Has ds_*_src2 instructions"
524 def FeatureRegisterBanking : SubtargetFeature<"register-banking",
525 "HasRegisterBanking",
527 "Has register banking"
530 def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
533 "Can use one literal in VOP3"
536 def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
537 "HasNoDataDepHazard",
539 "Does not need SW waitstates"
542 //===------------------------------------------------------------===//
543 // Subtarget Features (options and debugging)
544 //===------------------------------------------------------------===//
546 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
547 "max-private-element-size-"#size,
548 "MaxPrivateElementSize",
550 "Maximum private access size may be "#size
553 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
554 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
555 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
557 def FeatureDumpCode : SubtargetFeature <"DumpCode",
560 "Dump MachineInstrs in the CodeEmitter"
563 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
566 "Dump MachineInstrs in the CodeEmitter"
569 // XXX - This should probably be removed once enabled by default
570 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
571 "EnableLoadStoreOpt",
573 "Enable SI load/store optimizer pass"
576 // Performance debugging feature. Allow using DS instruction immediate
577 // offsets even if the base pointer can't be proven to be base. On SI,
578 // base pointer values that won't give the same result as a 16-bit add
579 // are not safe to fold, but this will override the conservative test
580 // for the base pointer.
581 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
582 "unsafe-ds-offset-folding",
583 "EnableUnsafeDSOffsetFolding",
585 "Force using DS instruction immediate offsets on SI"
588 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
591 "Enable SI Machine Scheduler"
594 def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
597 "Use ds_{read|write}_b128"
600 // Sparse texture support requires that all result registers are zeroed when
601 // PRTStrictNull is set to true. This feature is turned on for all architectures
602 // but is enabled as a feature in case there are situations where PRTStrictNull
603 // is disabled by the driver.
604 def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
605 "EnablePRTStrictNull",
607 "Enable zeroing of result registers for sparse texture fetches"
610 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
611 // all OS-es on VI and newer hardware to avoid assertion failures due
612 // to missing ADDR64 variants of MUBUF instructions.
613 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
616 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
619 "Force to generate flat instruction for global"
622 def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
623 "auto-waitcnt-before-barrier",
624 "AutoWaitcntBeforeBarrier",
626 "Hardware automatically inserts waitcnt before barrier"
629 def FeatureCodeObjectV3 : SubtargetFeature <
633 "Generate code object version 3"
636 def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
637 "HasTrigReducedRange",
639 "Requires use of fract on arguments to trig instructions"
642 // Dummy feature used to disable assembler instructions.
643 def FeatureDisable : SubtargetFeature<"",
644 "FeatureDisable","true",
645 "Dummy feature to disable assembler instructions"
648 class GCNSubtargetFeatureGeneration <string Value,
650 list<SubtargetFeature> Implies> :
651 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>;
653 def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
655 [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
656 FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
657 FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
658 FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
659 FeatureDoesNotSupportXNACK]
662 def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
664 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
665 FeatureWavefrontSize64, FeatureFlatAddressSpace,
666 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
667 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
668 FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
671 def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
673 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
674 FeatureWavefrontSize64, FeatureFlatAddressSpace,
675 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
676 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
677 FeatureScalarStores, FeatureInv2PiInlineImm,
678 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
679 FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
680 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
681 FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
685 def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
687 [FeatureFP64, FeatureLocalMemorySize65536,
688 FeatureWavefrontSize64, FeatureFlatAddressSpace,
689 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
690 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
691 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
692 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
693 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
694 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
695 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
696 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
697 FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
698 FeatureFastDenormalF32
702 def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
704 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
705 FeatureFlatAddressSpace,
706 FeatureCIInsts, Feature16BitInsts,
707 FeatureSMemRealTime, FeatureInv2PiInlineImm,
708 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
709 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
710 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
711 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
712 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
713 FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
714 FeatureVOP3Literal, FeatureDPP8,
715 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
716 FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16
720 class FeatureSet<list<SubtargetFeature> Features_> {
721 list<SubtargetFeature> Features = Features_;
724 def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
727 FeatureLDSBankCount32,
728 FeatureDoesNotSupportXNACK,
729 FeatureCodeObjectV3]>;
731 def FeatureISAVersion6_0_1 : FeatureSet<
732 [FeatureSouthernIslands,
733 FeatureLDSBankCount32,
734 FeatureDoesNotSupportXNACK,
735 FeatureCodeObjectV3]>;
737 def FeatureISAVersion7_0_0 : FeatureSet<
739 FeatureLDSBankCount32,
740 FeatureDoesNotSupportXNACK,
741 FeatureCodeObjectV3]>;
743 def FeatureISAVersion7_0_1 : FeatureSet<
746 FeatureLDSBankCount32,
748 FeatureDoesNotSupportXNACK,
749 FeatureCodeObjectV3]>;
751 def FeatureISAVersion7_0_2 : FeatureSet<
753 FeatureLDSBankCount16,
755 FeatureDoesNotSupportXNACK,
756 FeatureCodeObjectV3]>;
758 def FeatureISAVersion7_0_3 : FeatureSet<
760 FeatureLDSBankCount16,
761 FeatureDoesNotSupportXNACK,
762 FeatureCodeObjectV3]>;
764 def FeatureISAVersion7_0_4 : FeatureSet<
766 FeatureLDSBankCount32,
767 FeatureDoesNotSupportXNACK,
768 FeatureCodeObjectV3]>;
770 def FeatureISAVersion8_0_1 : FeatureSet<
771 [FeatureVolcanicIslands,
774 FeatureLDSBankCount32,
776 FeatureUnpackedD16VMem,
777 FeatureCodeObjectV3]>;
779 def FeatureISAVersion8_0_2 : FeatureSet<
780 [FeatureVolcanicIslands,
781 FeatureLDSBankCount32,
783 FeatureUnpackedD16VMem,
784 FeatureDoesNotSupportXNACK,
785 FeatureCodeObjectV3]>;
787 def FeatureISAVersion8_0_3 : FeatureSet<
788 [FeatureVolcanicIslands,
789 FeatureLDSBankCount32,
790 FeatureUnpackedD16VMem,
791 FeatureDoesNotSupportXNACK,
792 FeatureCodeObjectV3]>;
794 def FeatureISAVersion8_1_0 : FeatureSet<
795 [FeatureVolcanicIslands,
796 FeatureLDSBankCount16,
798 FeatureCodeObjectV3]>;
800 def FeatureISAVersion9_0_0 : FeatureSet<
803 FeatureLDSBankCount32,
805 FeatureDoesNotSupportXNACK,
806 FeatureDoesNotSupportSRAMECC]>;
808 def FeatureISAVersion9_0_2 : FeatureSet<
811 FeatureLDSBankCount32,
813 FeatureDoesNotSupportSRAMECC,
814 FeatureCodeObjectV3]>;
816 def FeatureISAVersion9_0_4 : FeatureSet<
818 FeatureLDSBankCount32,
820 FeatureDoesNotSupportXNACK,
821 FeatureDoesNotSupportSRAMECC,
822 FeatureCodeObjectV3]>;
824 def FeatureISAVersion9_0_6 : FeatureSet<
828 FeatureLDSBankCount32,
832 FeatureDoesNotSupportXNACK,
833 FeatureCodeObjectV3]>;
835 def FeatureISAVersion9_0_8 : FeatureSet<
839 FeatureLDSBankCount32,
848 FeaturePkFmacF16Inst,
849 FeatureAtomicFaddInsts,
851 FeatureMFMAInlineLiteralBug,
852 FeatureCodeObjectV3]>;
854 def FeatureISAVersion9_0_9 : FeatureSet<
857 FeatureLDSBankCount32,
859 FeatureCodeObjectV3]>;
861 // TODO: Organize more features into groups.
863 // Bugs present on gfx10.1.
864 list<SubtargetFeature> GFX10_1_Bugs = [
865 FeatureVcmpxPermlaneHazard,
866 FeatureVMEMtoScalarWriteHazard,
867 FeatureSMEMtoVectorWriteHazard,
868 FeatureInstFwdPrefetchBug,
869 FeatureVcmpxExecWARHazard,
870 FeatureLdsBranchVmemWARHazard,
873 FeatureFlatSegmentOffsetBug
877 def FeatureISAVersion10_1_0 : FeatureSet<
878 !listconcat(FeatureGroup.GFX10_1_Bugs,
880 FeatureLDSBankCount32,
883 FeatureWavefrontSize32,
885 FeatureScalarAtomics,
886 FeatureScalarFlatScratchInsts,
887 FeatureGetWaveIdInst,
889 FeatureMadMacF32Insts,
891 FeatureLdsMisalignedBug,
892 FeatureDoesNotSupportXNACK,
893 FeatureCodeObjectV3])>;
895 def FeatureISAVersion10_1_1 : FeatureSet<
896 !listconcat(FeatureGroup.GFX10_1_Bugs,
898 FeatureLDSBankCount32,
905 FeatureWavefrontSize32,
907 FeatureScalarAtomics,
908 FeatureScalarFlatScratchInsts,
909 FeatureGetWaveIdInst,
911 FeatureMadMacF32Insts,
913 FeatureDoesNotSupportXNACK,
914 FeatureCodeObjectV3])>;
916 def FeatureISAVersion10_1_2 : FeatureSet<
917 !listconcat(FeatureGroup.GFX10_1_Bugs,
919 FeatureLDSBankCount32,
926 FeatureWavefrontSize32,
928 FeatureScalarAtomics,
929 FeatureScalarFlatScratchInsts,
930 FeatureGetWaveIdInst,
932 FeatureMadMacF32Insts,
934 FeatureLdsMisalignedBug,
935 FeatureDoesNotSupportXNACK,
936 FeatureCodeObjectV3])>;
938 def FeatureISAVersion10_3_0 : FeatureSet<
940 FeatureGFX10_BEncoding,
942 FeatureLDSBankCount32,
949 FeatureWavefrontSize32,
950 FeatureDoesNotSupportXNACK,
951 FeatureCodeObjectV3]>;
953 //===----------------------------------------------------------------------===//
955 def AMDGPUInstrInfo : InstrInfo {
956 let guessInstructionProperties = 1;
957 let noNamedPositionallyEncodedOperands = 1;
960 def AMDGPUAsmParser : AsmParser {
961 // Some of the R600 registers have the same name, so this crashes.
962 // For example T0_XYZW and T0_XY both have the asm name T0.
963 let ShouldEmitMatchRegisterName = 0;
966 def AMDGPUAsmWriter : AsmWriter {
967 int PassSubtarget = 1;
970 def AMDGPUAsmVariants {
971 string Default = "Default";
973 string VOP3 = "VOP3";
975 string SDWA = "SDWA";
977 string SDWA9 = "SDWA9";
981 string Disable = "Disable";
985 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
986 let Variant = AMDGPUAsmVariants.Default_ID;
987 let Name = AMDGPUAsmVariants.Default;
990 def VOP3AsmParserVariant : AsmParserVariant {
991 let Variant = AMDGPUAsmVariants.VOP3_ID;
992 let Name = AMDGPUAsmVariants.VOP3;
995 def SDWAAsmParserVariant : AsmParserVariant {
996 let Variant = AMDGPUAsmVariants.SDWA_ID;
997 let Name = AMDGPUAsmVariants.SDWA;
1000 def SDWA9AsmParserVariant : AsmParserVariant {
1001 let Variant = AMDGPUAsmVariants.SDWA9_ID;
1002 let Name = AMDGPUAsmVariants.SDWA9;
1006 def DPPAsmParserVariant : AsmParserVariant {
1007 let Variant = AMDGPUAsmVariants.DPP_ID;
1008 let Name = AMDGPUAsmVariants.DPP;
1011 def AMDGPU : Target {
1012 // Pull in Instruction Info:
1013 let InstructionSet = AMDGPUInstrInfo;
1014 let AssemblyParsers = [AMDGPUAsmParser];
1015 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
1016 VOP3AsmParserVariant,
1017 SDWAAsmParserVariant,
1018 SDWA9AsmParserVariant,
1019 DPPAsmParserVariant];
1020 let AssemblyWriters = [AMDGPUAsmWriter];
1021 let AllowRegisterRenaming = 1;
1024 // Dummy Instruction itineraries for pseudo instructions
1025 def ALU_NULL : FuncUnit;
1026 def NullALU : InstrItinClass;
1028 //===----------------------------------------------------------------------===//
1029 // Predicate helper class
1030 //===----------------------------------------------------------------------===//
1033 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
1034 AssemblerPredicate<(all_of FeatureSouthernIslands)>;
1037 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
1038 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
1039 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>;
1041 def isGFX6GFX7GFX10 :
1042 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
1043 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
1044 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
1045 AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>;
1048 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
1049 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>;
1052 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
1053 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
1054 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>;
1056 def isGFX7GFX8GFX9 :
1057 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
1058 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1059 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1060 AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>;
1062 def isGFX6GFX7GFX8GFX9 :
1063 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
1064 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
1065 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1066 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1067 AssemblerPredicate<(all_of (not FeatureGFX10Insts))>;
1070 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
1071 AssemblerPredicate<(all_of FeatureCIInsts)>;
1074 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1075 AssemblerPredicate<(all_of FeatureGFX8Insts)>;
1077 def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
1078 "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1079 AssemblerPredicate <(all_of FeatureVolcanicIslands)>;
1082 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1083 AssemblerPredicate<(all_of FeatureGFX9Insts)>;
1085 def isGFX9Only : Predicate <
1086 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1087 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>;
1090 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1091 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1092 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>;
1095 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
1096 AssemblerPredicate<(all_of FeatureGFX10Insts)>;
1098 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
1099 AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
1101 def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
1102 AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>;
1103 def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
1104 AssemblerPredicate<(all_of FeatureFlatScratchInsts)>;
1105 def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
1106 AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>;
1107 def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
1108 AssemblerPredicate<(all_of FeatureGFX9Insts)>;
1110 def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">,
1111 AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>;
1113 def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
1114 AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>;
1115 def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
1116 AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>;
1118 def D16PreservesUnusedBits :
1119 Predicate<"Subtarget->d16PreservesUnusedBits()">,
1120 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>;
1122 def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
1123 def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
1125 def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1126 AssemblerPredicate<(all_of FeatureGFX9Insts)>;
1128 def HasLDSFPAtomics : Predicate<"Subtarget->hasLDSFPAtomics()">,
1129 AssemblerPredicate<(all_of FeatureGFX8Insts)>;
1131 def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
1132 AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>;
1134 def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
1136 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
1137 AssemblerPredicate<(all_of Feature16BitInsts)>;
1138 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
1139 AssemblerPredicate<(all_of FeatureVOP3P)>;
1141 def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">;
1142 def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">;
1144 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
1145 AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>;
1148 Predicate<"Subtarget->hasSDWA()">,
1149 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>;
1152 Predicate<"Subtarget->hasSDWA()">,
1153 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>;
1155 def HasDPP : Predicate<"Subtarget->hasDPP()">,
1156 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>;
1158 def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
1159 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>;
1161 def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
1162 AssemblerPredicate<(all_of FeatureR128A16)>;
1164 def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">,
1165 AssemblerPredicate<(all_of FeatureGFX10A16)>;
1167 def HasG16 : Predicate<"Subtarget->hasG16()">,
1168 AssemblerPredicate<(all_of FeatureG16)>;
1170 def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
1171 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>;
1173 def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
1174 AssemblerPredicate<(all_of FeatureIntClamp)>;
1176 def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
1177 AssemblerPredicate<(all_of FeatureMadMixInsts)>;
1179 def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
1180 AssemblerPredicate<(all_of FeatureScalarStores)>;
1182 def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
1183 AssemblerPredicate<(all_of FeatureScalarAtomics)>;
1185 def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
1186 AssemblerPredicate<(all_of FeatureNoSdstCMPX)>;
1188 def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
1189 AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>;
1191 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
1192 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
1193 def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
1194 AssemblerPredicate<(all_of FeatureVGPRIndexMode)>;
1195 def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
1196 AssemblerPredicate<(all_of FeatureMovrel)>;
1198 def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
1199 AssemblerPredicate<(all_of FeatureFmaMixInsts)>;
1201 def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
1202 AssemblerPredicate<(all_of FeatureDLInsts)>;
1204 def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
1205 AssemblerPredicate<(all_of FeatureDot1Insts)>;
1207 def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
1208 AssemblerPredicate<(all_of FeatureDot2Insts)>;
1210 def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
1211 AssemblerPredicate<(all_of FeatureDot3Insts)>;
1213 def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
1214 AssemblerPredicate<(all_of FeatureDot4Insts)>;
1216 def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
1217 AssemblerPredicate<(all_of FeatureDot5Insts)>;
1219 def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
1220 AssemblerPredicate<(all_of FeatureDot6Insts)>;
1222 def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
1223 AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
1225 def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
1226 AssemblerPredicate<(all_of FeatureMAIInsts)>;
1228 def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
1229 AssemblerPredicate<(all_of FeatureSMemTimeInst)>;
1231 def HasNoSMemTimeInst : Predicate<"!Subtarget->hasSMemTimeInst()">;
1233 def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
1234 AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>;
1236 def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
1237 AssemblerPredicate<(all_of FeatureMadMacF32Insts)>;
1239 def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
1240 AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>;
1242 def HasNoMadMacF32Insts : Predicate<"!Subtarget->hasMadMacF32Insts()">,
1243 AssemblerPredicate<(all_of (not FeatureMadMacF32Insts))>;
1245 def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
1246 AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
1248 def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
1249 AssemblerPredicate<(all_of FeatureOffset3fBug)>;
1251 def EnableLateCFGStructurize : Predicate<
1252 "EnableLateStructurizeCFG">;
1254 // Include AMDGPU TD files
1255 include "SISchedule.td"
1256 include "GCNProcessors.td"
1257 include "AMDGPUInstrInfo.td"
1258 include "SIRegisterInfo.td"
1259 include "AMDGPURegisterBanks.td"
1260 include "AMDGPUInstructions.td"
1261 include "SIInstrInfo.td"
1262 include "AMDGPUCallingConv.td"
1263 include "AMDGPUSearchableTables.td"