1 //===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10 AssemblerPredicate <"FeatureWavefrontSize32">;
11 def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12 AssemblerPredicate <"FeatureWavefrontSize64">;
14 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
16 class GCNPredicateControl : PredicateControl {
17 Predicate SIAssemblerPredicate = isGFX6GFX7;
18 Predicate VIAssemblerPredicate = isGFX8GFX9;
21 // Execpt for the NONE field, this must be kept in sync with the
22 // SIEncodingFamily enum in AMDGPUInstrInfo.cpp
23 def SIEncodingFamily {
35 //===----------------------------------------------------------------------===//
37 //===----------------------------------------------------------------------===//
39 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
41 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
42 SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
44 [SDNPMayLoad, SDNPMemOperand]
47 def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
48 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
49 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
52 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
53 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
56 def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
57 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
60 def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
61 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
64 def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
65 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
68 def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
69 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
72 // load_d16_{lo|hi} ptr, tied_input
73 def SIload_d16 : SDTypeProfile<1, 2, [
79 def SDTtbuffer_load : SDTypeProfile<1, 8,
81 SDTCisVT<1, v4i32>, // rsrc
82 SDTCisVT<2, i32>, // vindex(VGPR)
83 SDTCisVT<3, i32>, // voffset(VGPR)
84 SDTCisVT<4, i32>, // soffset(SGPR)
85 SDTCisVT<5, i32>, // offset(imm)
86 SDTCisVT<6, i32>, // format(imm)
87 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
88 SDTCisVT<8, i1> // idxen(imm)
91 def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
92 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
93 def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
95 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
97 def SDTtbuffer_store : SDTypeProfile<0, 9,
99 SDTCisVT<1, v4i32>, // rsrc
100 SDTCisVT<2, i32>, // vindex(VGPR)
101 SDTCisVT<3, i32>, // voffset(VGPR)
102 SDTCisVT<4, i32>, // soffset(SGPR)
103 SDTCisVT<5, i32>, // offset(imm)
104 SDTCisVT<6, i32>, // format(imm)
105 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
106 SDTCisVT<8, i1> // idxen(imm)
109 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
110 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
111 def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
113 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
115 def SDTBufferLoad : SDTypeProfile<1, 7,
117 SDTCisVT<1, v4i32>, // rsrc
118 SDTCisVT<2, i32>, // vindex(VGPR)
119 SDTCisVT<3, i32>, // voffset(VGPR)
120 SDTCisVT<4, i32>, // soffset(SGPR)
121 SDTCisVT<5, i32>, // offset(imm)
122 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
123 SDTCisVT<7, i1>]>; // idxen(imm)
125 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
126 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
127 def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
128 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
129 def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
130 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
131 def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
132 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
133 def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
135 def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
136 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
137 def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
139 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
141 def SDTBufferStore : SDTypeProfile<0, 8,
143 SDTCisVT<1, v4i32>, // rsrc
144 SDTCisVT<2, i32>, // vindex(VGPR)
145 SDTCisVT<3, i32>, // voffset(VGPR)
146 SDTCisVT<4, i32>, // soffset(SGPR)
147 SDTCisVT<5, i32>, // offset(imm)
148 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
149 SDTCisVT<7, i1>]>; // idxen(imm)
151 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
152 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
153 def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
155 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
156 def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
158 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
159 def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
161 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
162 def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
164 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
166 class SDBufferAtomic<string opcode> : SDNode <opcode,
168 [SDTCisVT<2, v4i32>, // rsrc
169 SDTCisVT<3, i32>, // vindex(VGPR)
170 SDTCisVT<4, i32>, // voffset(VGPR)
171 SDTCisVT<5, i32>, // soffset(SGPR)
172 SDTCisVT<6, i32>, // offset(imm)
173 SDTCisVT<7, i32>, // cachepolicy(imm)
174 SDTCisVT<8, i1>]>, // idxen(imm)
175 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
178 class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
180 [SDTCisVT<0, ty>, // vdata
181 SDTCisVT<1, v4i32>, // rsrc
182 SDTCisVT<2, i32>, // vindex(VGPR)
183 SDTCisVT<3, i32>, // voffset(VGPR)
184 SDTCisVT<4, i32>, // soffset(SGPR)
185 SDTCisVT<5, i32>, // offset(imm)
186 SDTCisVT<6, i32>, // cachepolicy(imm)
187 SDTCisVT<7, i1>]>, // idxen(imm)
188 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
191 def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
192 def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
193 def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
194 def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
195 def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
196 def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
197 def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
198 def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
199 def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
200 def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
201 def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
202 def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
203 def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
204 def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
206 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
208 [SDTCisVT<0, i32>, // dst
209 SDTCisVT<1, i32>, // src
210 SDTCisVT<2, i32>, // cmp
211 SDTCisVT<3, v4i32>, // rsrc
212 SDTCisVT<4, i32>, // vindex(VGPR)
213 SDTCisVT<5, i32>, // voffset(VGPR)
214 SDTCisVT<6, i32>, // soffset(SGPR)
215 SDTCisVT<7, i32>, // offset(imm)
216 SDTCisVT<8, i32>, // cachepolicy(imm)
217 SDTCisVT<9, i1>]>, // idxen(imm)
218 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
221 class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
223 [SDTCisPtrTy<0>, // vaddr
224 SDTCisVT<1, ty>]>, // vdata
225 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
228 def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
230 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
231 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
234 def SIlds : SDNode<"AMDGPUISD::LDS",
235 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
238 def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
240 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
243 def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
245 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
248 def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
250 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
253 def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
255 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
258 def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
260 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
263 def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
265 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
268 def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
269 SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
270 [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
273 //===----------------------------------------------------------------------===//
275 //===----------------------------------------------------------------------===//
277 // Returns 1 if the source arguments have modifiers, 0 if they do not.
278 // XXX - do f16 instructions?
279 class isFloatType<ValueType SrcVT> {
281 !if(!eq(SrcVT.Value, f16.Value), 1,
282 !if(!eq(SrcVT.Value, f32.Value), 1,
283 !if(!eq(SrcVT.Value, f64.Value), 1,
284 !if(!eq(SrcVT.Value, v2f16.Value), 1,
285 !if(!eq(SrcVT.Value, v4f16.Value), 1,
286 !if(!eq(SrcVT.Value, v2f32.Value), 1,
287 !if(!eq(SrcVT.Value, v2f64.Value), 1,
291 class isIntType<ValueType SrcVT> {
293 !if(!eq(SrcVT.Value, i16.Value), 1,
294 !if(!eq(SrcVT.Value, i32.Value), 1,
295 !if(!eq(SrcVT.Value, i64.Value), 1,
299 class isPackedType<ValueType SrcVT> {
301 !if(!eq(SrcVT.Value, v2i16.Value), 1,
302 !if(!eq(SrcVT.Value, v2f16.Value), 1,
303 !if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
307 //===----------------------------------------------------------------------===//
308 // PatFrags for global memory operations
309 //===----------------------------------------------------------------------===//
311 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
312 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
315 defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
316 defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
317 defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
318 defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
321 } // End let AddressSpaces = ...
322 } // End foreach AddrSpace
324 def atomic_fadd_global_noret : PatFrag<
325 (ops node:$ptr, node:$value),
326 (atomic_load_fadd node:$ptr, node:$value)> {
330 let AddressSpaces = StoreAddress_global.AddrSpaces;
333 def atomic_pk_fadd_global_noret : PatFrag<
334 (ops node:$ptr, node:$value),
335 (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> {
337 let MemoryVT = v2f16;
339 let AddressSpaces = StoreAddress_global.AddrSpaces;
342 //===----------------------------------------------------------------------===//
343 // SDNodes PatFrags for loads/stores with a glue input.
344 // This is for SDNodes and PatFrag for local loads and stores to
345 // enable s_mov_b32 m0, -1 to be glued to the memory instructions.
347 // These mirror the regular load/store PatFrags and rely on special
348 // processing during Select() to add the glued copy.
350 //===----------------------------------------------------------------------===//
352 def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
353 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
356 def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
357 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
360 def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
365 def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
367 let IsNonExtLoad = 1;
370 def atomic_load_32_glue : PatFrag<(ops node:$ptr),
371 (AMDGPUatomic_ld_glue node:$ptr)> {
376 def atomic_load_64_glue : PatFrag<(ops node:$ptr),
377 (AMDGPUatomic_ld_glue node:$ptr)> {
382 def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
384 let IsAnyExtLoad = 1;
387 def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
389 let IsSignExtLoad = 1;
392 def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
394 let IsZeroExtLoad = 1;
397 def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
402 def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
407 def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
412 def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
417 def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
422 def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
428 let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
429 def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
430 let IsNonExtLoad = 1;
433 let MemoryVT = i8 in {
434 def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
435 def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
436 def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
439 let MemoryVT = i16 in {
440 def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
441 def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
442 def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
445 def load_align8_local_m0 : PatFrag<(ops node:$ptr),
446 (load_local_m0 node:$ptr)> {
448 let IsNonExtLoad = 1;
449 let MinAlignment = 8;
451 def load_align16_local_m0 : PatFrag<(ops node:$ptr),
452 (load_local_m0 node:$ptr)> {
454 let IsNonExtLoad = 1;
455 let MinAlignment = 16;
460 let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
461 def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
462 (atomic_load_32_glue node:$ptr)> {
465 def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
466 (atomic_load_64_glue node:$ptr)> {
470 } // End let AddressSpaces = LoadAddress_local.AddrSpaces
473 def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
474 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
477 def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
478 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
481 def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
482 (AMDGPUst_glue node:$val, node:$ptr)> {
487 def store_glue : PatFrag<(ops node:$val, node:$ptr),
488 (unindexedstore_glue node:$val, node:$ptr)> {
490 let IsTruncStore = 0;
493 def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
494 (unindexedstore_glue node:$val, node:$ptr)> {
496 let IsTruncStore = 1;
499 def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
500 (truncstore_glue node:$val, node:$ptr)> {
505 def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
506 (truncstore_glue node:$val, node:$ptr)> {
511 let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
512 def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
513 (store_glue node:$val, node:$ptr)> {
515 let IsTruncStore = 0;
518 def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
519 (unindexedstore_glue node:$val, node:$ptr)> {
524 def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
525 (unindexedstore_glue node:$val, node:$ptr)> {
531 def store_align16_local_m0 : PatFrag <
532 (ops node:$value, node:$ptr),
533 (store_local_m0 node:$value, node:$ptr)> {
535 let IsTruncStore = 0;
536 let MinAlignment = 16;
539 def store_align8_local_m0 : PatFrag <
540 (ops node:$value, node:$ptr),
541 (store_local_m0 node:$value, node:$ptr)> {
543 let IsTruncStore = 0;
544 let MinAlignment = 8;
547 let AddressSpaces = StoreAddress_local.AddrSpaces in {
549 def atomic_store_local_32_m0 : PatFrag <
550 (ops node:$value, node:$ptr),
551 (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
555 def atomic_store_local_64_m0 : PatFrag <
556 (ops node:$value, node:$ptr),
557 (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
561 } // End let AddressSpaces = StoreAddress_local.AddrSpaces
564 def si_setcc_uniform : PatFrag <
565 (ops node:$lhs, node:$rhs, node:$cond),
566 (setcc node:$lhs, node:$rhs, node:$cond), [{
567 for (SDNode *Use : N->uses()) {
568 if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
571 unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
572 if (Reg != AMDGPU::SCC)
578 //===----------------------------------------------------------------------===//
579 // SDNodes PatFrags for d16 loads
580 //===----------------------------------------------------------------------===//
582 class LoadD16Frag <SDPatternOperator op> : PatFrag<
583 (ops node:$ptr, node:$tied_in),
584 (op node:$ptr, node:$tied_in)> {
588 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
589 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
591 def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
593 def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
597 def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
601 def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
603 def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
607 def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
611 } // End let AddressSpaces = ...
612 } // End foreach AddrSpace
614 def lshr_rev : PatFrag <
615 (ops node:$src1, node:$src0),
619 def ashr_rev : PatFrag <
620 (ops node:$src1, node:$src0),
624 def lshl_rev : PatFrag <
625 (ops node:$src1, node:$src0),
629 def add_ctpop : PatFrag <
630 (ops node:$src0, node:$src1),
631 (add (ctpop $src0), $src1)
634 multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
635 SDTypeProfile tc = SDTAtomic2,
639 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
640 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
643 let AddressSpaces = StoreAddress_local.AddrSpaces in {
644 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
647 let AddressSpaces = StoreAddress_region.AddrSpaces in {
648 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
652 defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
653 defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
654 defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
655 defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
656 defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
657 defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
658 defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
659 defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
660 defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
661 defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
662 defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
663 defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
664 defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
665 defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
666 defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
668 def as_i1imm : SDNodeXForm<imm, [{
669 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
672 def as_i8imm : SDNodeXForm<imm, [{
673 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
676 def as_i16imm : SDNodeXForm<imm, [{
677 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
680 def as_i16timm : SDNodeXForm<timm, [{
681 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
684 def as_i32imm: SDNodeXForm<imm, [{
685 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
688 def as_i32timm: SDNodeXForm<timm, [{
689 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
692 def as_i64imm: SDNodeXForm<imm, [{
693 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
696 def cond_as_i32imm: SDNodeXForm<cond, [{
697 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
700 // Copied from the AArch64 backend:
701 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
702 return CurDAG->getTargetConstant(
703 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
706 def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
707 auto FI = cast<FrameIndexSDNode>(N);
708 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
711 // Copied from the AArch64 backend:
712 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
713 return CurDAG->getTargetConstant(
714 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
717 class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
718 uint64_t Imm = N->getZExtValue();
719 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
720 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
723 def SIMM16bit : ImmLeaf <i32,
724 [{return isInt<16>(Imm);}]
727 def UIMM16bit : ImmLeaf <i32,
728 [{return isUInt<16>(Imm);}]
731 def i64imm_32bit : ImmLeaf<i64, [{
732 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
735 def InlineImm16 : ImmLeaf<i16, [{
736 return isInlineImmediate16(Imm);
739 def InlineImm32 : ImmLeaf<i32, [{
740 return isInlineImmediate32(Imm);
743 def InlineImm64 : ImmLeaf<i64, [{
744 return isInlineImmediate64(Imm);
747 def InlineImmFP32 : FPImmLeaf<f32, [{
748 return isInlineImmediate(Imm);
751 def InlineImmFP64 : FPImmLeaf<f64, [{
752 return isInlineImmediate(Imm);
756 class VGPRImm <dag frag> : PatLeaf<frag, [{
760 def NegateImm : SDNodeXForm<imm, [{
761 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
764 // TODO: When FP inline imm values work?
765 def NegSubInlineConst32 : ImmLeaf<i32, [{
766 return Imm < -16 && Imm >= -64;
769 def NegSubInlineConst16 : ImmLeaf<i16, [{
770 return Imm < -16 && Imm >= -64;
773 def ShiftAmt32Imm : ImmLeaf <i32, [{
777 def getNegV2I16Imm : SDNodeXForm<build_vector, [{
778 return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
781 def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
782 assert(N->getNumOperands() == 2);
783 assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
784 SDValue Src0 = N->getOperand(0);
785 SDValue Src1 = N->getOperand(1);
787 return isNegInlineImmediate(Src0.getNode());
789 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
790 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
793 //===----------------------------------------------------------------------===//
795 //===----------------------------------------------------------------------===//
797 def SoppBrTarget : AsmOperandClass {
798 let Name = "SoppBrTarget";
799 let ParserMethod = "parseSOppBrTarget";
802 def sopp_brtarget : Operand<OtherVT> {
803 let EncoderMethod = "getSOPPBrEncoding";
804 let DecoderMethod = "decodeSoppBrTarget";
805 let OperandType = "OPERAND_PCREL";
806 let ParserMatchClass = SoppBrTarget;
809 def si_ga : Operand<iPTR>;
811 def InterpSlotMatchClass : AsmOperandClass {
812 let Name = "InterpSlot";
813 let PredicateMethod = "isInterpSlot";
814 let ParserMethod = "parseInterpSlot";
815 let RenderMethod = "addImmOperands";
818 def InterpSlot : Operand<i32> {
819 let PrintMethod = "printInterpSlot";
820 let ParserMatchClass = InterpSlotMatchClass;
821 let OperandType = "OPERAND_IMMEDIATE";
824 def AttrMatchClass : AsmOperandClass {
826 let PredicateMethod = "isInterpAttr";
827 let ParserMethod = "parseInterpAttr";
828 let RenderMethod = "addImmOperands";
831 // It appears to be necessary to create a separate operand for this to
832 // be able to parse attr<num> with no space.
833 def Attr : Operand<i32> {
834 let PrintMethod = "printInterpAttr";
835 let ParserMatchClass = AttrMatchClass;
836 let OperandType = "OPERAND_IMMEDIATE";
839 def AttrChanMatchClass : AsmOperandClass {
840 let Name = "AttrChan";
841 let PredicateMethod = "isAttrChan";
842 let RenderMethod = "addImmOperands";
845 def AttrChan : Operand<i32> {
846 let PrintMethod = "printInterpAttrChan";
847 let ParserMatchClass = AttrChanMatchClass;
848 let OperandType = "OPERAND_IMMEDIATE";
851 def SendMsgMatchClass : AsmOperandClass {
852 let Name = "SendMsg";
853 let PredicateMethod = "isSendMsg";
854 let ParserMethod = "parseSendMsgOp";
855 let RenderMethod = "addImmOperands";
858 def SwizzleMatchClass : AsmOperandClass {
859 let Name = "Swizzle";
860 let PredicateMethod = "isSwizzle";
861 let ParserMethod = "parseSwizzleOp";
862 let RenderMethod = "addImmOperands";
866 def EndpgmMatchClass : AsmOperandClass {
867 let Name = "EndpgmImm";
868 let PredicateMethod = "isEndpgm";
869 let ParserMethod = "parseEndpgmOp";
870 let RenderMethod = "addImmOperands";
874 def ExpTgtMatchClass : AsmOperandClass {
876 let PredicateMethod = "isExpTgt";
877 let ParserMethod = "parseExpTgt";
878 let RenderMethod = "printExpTgt";
881 def SWaitMatchClass : AsmOperandClass {
882 let Name = "SWaitCnt";
883 let RenderMethod = "addImmOperands";
884 let ParserMethod = "parseSWaitCntOps";
887 def VReg32OrOffClass : AsmOperandClass {
888 let Name = "VReg32OrOff";
889 let ParserMethod = "parseVReg32OrOff";
892 let OperandType = "OPERAND_IMMEDIATE" in {
893 def SendMsgImm : Operand<i32> {
894 let PrintMethod = "printSendMsg";
895 let ParserMatchClass = SendMsgMatchClass;
898 def SwizzleImm : Operand<i16> {
899 let PrintMethod = "printSwizzle";
900 let ParserMatchClass = SwizzleMatchClass;
903 def EndpgmImm : Operand<i16> {
904 let PrintMethod = "printEndpgm";
905 let ParserMatchClass = EndpgmMatchClass;
908 def WAIT_FLAG : Operand <i32> {
909 let ParserMatchClass = SWaitMatchClass;
910 let PrintMethod = "printWaitFlag";
912 } // End OperandType = "OPERAND_IMMEDIATE"
914 include "SIInstrFormats.td"
915 include "VIInstrFormats.td"
917 def BoolReg : AsmOperandClass {
918 let Name = "BoolReg";
919 let ParserMethod = "parseBoolReg";
920 let RenderMethod = "addRegOperands";
923 class BoolRC : RegisterOperand<SReg_1> {
924 let ParserMatchClass = BoolReg;
925 let DecoderMethod = "decodeBoolReg";
928 def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
929 let ParserMatchClass = BoolReg;
930 let DecoderMethod = "decodeBoolReg";
933 def VOPDstS64orS32 : BoolRC {
934 let PrintMethod = "printVOPDst";
937 // SCSrc_i1 is the operand for pseudo instructions only.
938 // Boolean immeadiates shall not be exposed to codegen instructions.
939 def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
940 let OperandNamespace = "AMDGPU";
941 let OperandType = "OPERAND_REG_IMM_INT32";
942 let ParserMatchClass = BoolReg;
943 let DecoderMethod = "decodeBoolReg";
946 // ===----------------------------------------------------------------------===//
947 // ExpSrc* Special cases for exp src operands which are printed as
948 // "off" depending on en operand.
949 // ===----------------------------------------------------------------------===//
951 def ExpSrc0 : RegisterOperand<VGPR_32> {
952 let PrintMethod = "printExpSrc0";
953 let ParserMatchClass = VReg32OrOffClass;
956 def ExpSrc1 : RegisterOperand<VGPR_32> {
957 let PrintMethod = "printExpSrc1";
958 let ParserMatchClass = VReg32OrOffClass;
961 def ExpSrc2 : RegisterOperand<VGPR_32> {
962 let PrintMethod = "printExpSrc2";
963 let ParserMatchClass = VReg32OrOffClass;
966 def ExpSrc3 : RegisterOperand<VGPR_32> {
967 let PrintMethod = "printExpSrc3";
968 let ParserMatchClass = VReg32OrOffClass;
971 class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
972 let OperandNamespace = "AMDGPU";
973 string Type = !if(isFloatType<vt>.ret, "FP", "INT");
974 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
975 let DecoderMethod = "decodeSDWASrc"#vt.Size;
976 let EncoderMethod = "getSDWASrcEncoding";
979 def SDWASrc_i32 : SDWASrc<i32>;
980 def SDWASrc_i16 : SDWASrc<i16>;
981 def SDWASrc_f32 : SDWASrc<f32>;
982 def SDWASrc_f16 : SDWASrc<f16>;
984 def SDWAVopcDst : BoolRC {
985 let OperandNamespace = "AMDGPU";
986 let OperandType = "OPERAND_SDWA_VOPC_DST";
987 let EncoderMethod = "getSDWAVopcDstEncoding";
988 let DecoderMethod = "decodeSDWAVopcDst";
989 let PrintMethod = "printVOPDst";
992 class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
993 let Name = "Imm"#CName;
994 let PredicateMethod = "is"#CName;
995 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName);
996 let RenderMethod = "addImmOperands";
997 let IsOptional = Optional;
998 let DefaultMethod = !if(Optional, "default"#CName, ?);
1001 class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
1002 let PrintMethod = "print"#Name;
1003 let ParserMatchClass = MatchClass;
1006 class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> :
1007 OperandWithDefaultOps<i1, (ops (i1 0))> {
1008 let PrintMethod = "print"#Name;
1009 let ParserMatchClass = MatchClass;
1012 class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
1013 let PrintMethod = "print"#Name;
1014 let ParserMatchClass = MatchClass;
1017 class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
1018 let PrintMethod = "print"#Name;
1019 let ParserMatchClass = MatchClass;
1022 class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
1023 let PrintMethod = "print"#Name;
1024 let ParserMatchClass = MatchClass;
1027 class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> :
1028 OperandWithDefaultOps<i32, (ops (i32 0))> {
1029 let PrintMethod = "print"#Name;
1030 let ParserMatchClass = MatchClass;
1033 class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
1034 OperandWithDefaultOps<i32, (ops (i32 0))> {
1035 let PrintMethod = "print"#Name;
1036 let ParserMatchClass = MatchClass;
1039 let OperandType = "OPERAND_IMMEDIATE" in {
1041 def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
1042 def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
1043 def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
1045 def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>;
1046 def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
1047 def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
1048 def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
1050 def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
1052 def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
1053 def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>;
1055 // We need to make the cases with a default of 0 distinct from no
1056 // default to help deal with some cases where the operand appears
1057 // before a mandatory operand.
1058 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
1059 def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
1060 def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
1062 def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
1063 def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
1064 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
1065 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
1066 def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
1067 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
1068 def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
1069 def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
1070 def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
1071 def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
1072 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
1073 def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
1075 def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
1077 def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
1078 def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
1080 def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
1082 def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
1083 def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
1084 def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
1085 def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
1086 def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
1088 def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
1089 def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
1090 def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
1091 def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
1093 def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
1094 def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
1095 def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
1096 def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
1098 def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
1099 def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
1100 def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
1102 def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
1104 def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
1108 } // End OperandType = "OPERAND_IMMEDIATE"
1110 class KImmMatchClass<int size> : AsmOperandClass {
1111 let Name = "KImmFP"#size;
1112 let PredicateMethod = "isKImmFP"#size;
1113 let ParserMethod = "parseImm";
1114 let RenderMethod = "addKImmFP"#size#"Operands";
1117 class kimmOperand<ValueType vt> : Operand<vt> {
1118 let OperandNamespace = "AMDGPU";
1119 let OperandType = "OPERAND_KIMM"#vt.Size;
1120 let PrintMethod = "printU"#vt.Size#"ImmOperand";
1121 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
1124 // 32-bit VALU immediate operand that uses the constant bus.
1125 def KImmFP32MatchClass : KImmMatchClass<32>;
1126 def f32kimm : kimmOperand<i32>;
1128 // 32-bit VALU immediate operand with a 16-bit value that uses the
1130 def KImmFP16MatchClass : KImmMatchClass<16>;
1131 def f16kimm : kimmOperand<i16>;
1133 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1134 let Name = "RegOrImmWithFP"#opSize#"InputMods";
1135 let ParserMethod = "parseRegOrImmWithFPInputMods";
1136 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1139 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1140 def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1141 def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1143 class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1144 let OperandNamespace = "AMDGPU";
1145 let OperandType = "OPERAND_INPUT_MODS";
1146 let ParserMatchClass = matchClass;
1149 class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1150 let PrintMethod = "printOperandAndFPInputMods";
1153 def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1154 def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1155 def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1157 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1158 let Name = "RegOrImmWithInt"#opSize#"InputMods";
1159 let ParserMethod = "parseRegOrImmWithIntInputMods";
1160 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1162 def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1163 def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1165 class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1166 let PrintMethod = "printOperandAndIntInputMods";
1168 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1169 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1171 class OpSelModsMatchClass : AsmOperandClass {
1172 let Name = "OpSelMods";
1173 let ParserMethod = "parseRegOrImm";
1174 let PredicateMethod = "isRegOrImm";
1177 def IntOpSelModsMatchClass : OpSelModsMatchClass;
1178 def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1180 class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1181 let Name = "SDWAWithFP"#opSize#"InputMods";
1182 let ParserMethod = "parseRegOrImmWithFPInputMods";
1183 let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1186 def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1187 def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1189 class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1190 InputMods <matchClass> {
1191 let PrintMethod = "printOperandAndFPInputMods";
1194 def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1195 def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1197 def FPVRegInputModsMatchClass : AsmOperandClass {
1198 let Name = "VRegWithFPInputMods";
1199 let ParserMethod = "parseRegWithFPInputMods";
1200 let PredicateMethod = "isVReg32";
1203 def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1204 let PrintMethod = "printOperandAndFPInputMods";
1207 class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1208 let Name = "SDWAWithInt"#opSize#"InputMods";
1209 let ParserMethod = "parseRegOrImmWithIntInputMods";
1210 let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1213 def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1214 def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1216 class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1217 InputMods <matchClass> {
1218 let PrintMethod = "printOperandAndIntInputMods";
1221 def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1222 def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1224 def IntVRegInputModsMatchClass : AsmOperandClass {
1225 let Name = "VRegWithIntInputMods";
1226 let ParserMethod = "parseRegWithIntInputMods";
1227 let PredicateMethod = "isVReg32";
1230 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1231 let PrintMethod = "printOperandAndIntInputMods";
1234 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1235 let Name = "PackedFP"#opSize#"InputMods";
1236 let ParserMethod = "parseRegOrImm";
1237 let PredicateMethod = "isRegOrImm";
1238 // let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1241 class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1242 let Name = "PackedInt"#opSize#"InputMods";
1243 let ParserMethod = "parseRegOrImm";
1244 let PredicateMethod = "isRegOrImm";
1245 // let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1248 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1249 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1251 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1252 // let PrintMethod = "printPackedFPInputMods";
1255 class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1256 //let PrintMethod = "printPackedIntInputMods";
1259 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1260 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1262 //===----------------------------------------------------------------------===//
1264 //===----------------------------------------------------------------------===//
1266 def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
1267 def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
1269 def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
1271 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1272 def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
1273 def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1274 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1275 // VOP3Mods, but the input source is known to never be NaN.
1276 def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
1277 // VOP3Mods, but only allowed for f32 operands.
1278 def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
1280 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1282 def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1283 def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
1285 def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1286 def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
1288 def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1289 def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
1291 def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1294 def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">;
1296 //===----------------------------------------------------------------------===//
1297 // SI assembler operands
1298 //===----------------------------------------------------------------------===//
1303 int FLAT_SCR = 0x68;
1306 // This should be kept in sync with SISrcMods enum
1330 int LLVM_DEBUG_TRAP = 3;
1346 int FLAT_SCR_LO = 20;
1347 int FLAT_SCR_HI = 21;
1348 int XNACK_MASK = 22;
1349 int POPS_PACKER = 25;
1352 class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1354 !or(!shl(Offset, 6),
1355 !shl(!add(Size, -1), 11)));
1358 //===----------------------------------------------------------------------===//
1360 // SI Instruction multiclass helpers.
1362 // Instructions with _32 take 32-bit operands.
1363 // Instructions with _64 take 64-bit operands.
1365 // VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
1366 // encoding is the standard encoding, but instruction that make use of
1367 // any of the instruction modifiers must use the 64-bit encoding.
1369 // Instructions with _e32 use the 32-bit encoding.
1370 // Instructions with _e64 use the 64-bit encoding.
1372 //===----------------------------------------------------------------------===//
1374 class SIMCInstr <string pseudo, int subtarget> {
1375 string PseudoInstr = pseudo;
1376 int Subtarget = subtarget;
1379 //===----------------------------------------------------------------------===//
1381 //===----------------------------------------------------------------------===//
1383 class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
1386 ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
1387 exp_vm:$vm, exp_compr:$compr, i8imm:$en),
1388 "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
1389 [(node (i8 timm:$tgt), (i8 timm:$en),
1390 f32:$src0, f32:$src1, f32:$src2, f32:$src3,
1391 (i1 timm:$compr), (i1 timm:$vm))]> {
1392 let AsmMatchConverter = "cvtExp";
1395 // Split EXP instruction into EXP and EXP_DONE so we can set
1396 // mayLoad for done=1.
1397 multiclass EXP_m<bit done, SDPatternOperator node> {
1398 let mayLoad = done, DisableWQM = 1 in {
1399 let isPseudo = 1, isCodeGenOnly = 1 in {
1400 def "" : EXP_Helper<done, node>,
1401 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
1404 let done = done in {
1405 def _si : EXP_Helper<done>,
1406 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
1408 let AssemblerPredicate = isGFX6GFX7;
1409 let DecoderNamespace = "GFX6GFX7";
1410 let DisableDecoder = DisableSIDecoder;
1413 def _vi : EXP_Helper<done>,
1414 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
1416 let AssemblerPredicate = isGFX8GFX9;
1417 let DecoderNamespace = "GFX8";
1418 let DisableDecoder = DisableVIDecoder;
1421 def _gfx10 : EXP_Helper<done>,
1422 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
1424 let AssemblerPredicate = isGFX10Plus;
1425 let DecoderNamespace = "GFX10";
1426 let DisableDecoder = DisableSIDecoder;
1432 //===----------------------------------------------------------------------===//
1433 // Vector ALU classes
1434 //===----------------------------------------------------------------------===//
1436 class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1438 !if (!eq(Src0.Value, untyped.Value), 0,
1439 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
1440 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
1444 // Returns the register class to use for the destination of VOP[123C]
1445 // instructions for the given VT.
1446 class getVALUDstForVT<ValueType VT> {
1447 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1448 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1449 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1450 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
1451 VOPDstS64orS32)))); // else VT == i1
1454 // Returns the register class to use for the destination of VOP[12C]
1455 // instructions with SDWA extension
1456 class getSDWADstForVT<ValueType VT> {
1457 RegisterOperand ret = !if(!eq(VT.Size, 1),
1458 SDWAVopcDst, // VOPC
1459 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1462 // Returns the register class to use for source 0 of VOP[12C]
1463 // instructions for the given VT.
1464 class getVOPSrc0ForVT<ValueType VT> {
1465 bit isFP = isFloatType<VT>.ret;
1467 RegisterOperand ret =
1469 !if(!eq(VT.Size, 64),
1471 !if(!eq(VT.Value, f16.Value),
1473 !if(!eq(VT.Value, v2f16.Value),
1475 !if(!eq(VT.Value, v4f16.Value),
1482 !if(!eq(VT.Size, 64),
1484 !if(!eq(VT.Value, i16.Value),
1486 !if(!eq(VT.Value, v2i16.Value),
1495 // Returns the vreg register class to use for source operand given VT
1496 class getVregSrcForVT<ValueType VT> {
1497 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
1498 !if(!eq(VT.Size, 96), VReg_96,
1499 !if(!eq(VT.Size, 64), VReg_64,
1500 !if(!eq(VT.Size, 48), VReg_64,
1504 class getSDWASrcForVT <ValueType VT> {
1505 bit isFP = isFloatType<VT>.ret;
1506 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1507 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1508 RegisterOperand ret = !if(isFP, retFlt, retInt);
1511 // Returns the register class to use for sources of VOP3 instructions for the
1513 class getVOP3SrcForVT<ValueType VT> {
1514 bit isFP = isFloatType<VT>.ret;
1515 RegisterOperand ret =
1516 !if(!eq(VT.Size, 128),
1518 !if(!eq(VT.Size, 64),
1522 !if(!eq(VT.Value, i1.Value),
1525 !if(!eq(VT.Value, f16.Value),
1527 !if(!eq(VT.Value, v2f16.Value),
1529 !if(!eq(VT.Value, v4f16.Value),
1535 !if(!eq(VT.Value, i16.Value),
1537 !if(!eq(VT.Value, v2i16.Value),
1548 // Float or packed int
1549 class isModifierType<ValueType SrcVT> {
1551 !if(!eq(SrcVT.Value, f16.Value), 1,
1552 !if(!eq(SrcVT.Value, f32.Value), 1,
1553 !if(!eq(SrcVT.Value, f64.Value), 1,
1554 !if(!eq(SrcVT.Value, v2f16.Value), 1,
1555 !if(!eq(SrcVT.Value, v2i16.Value), 1,
1559 // Return type of input modifiers operand for specified input operand
1560 class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
1561 bit isFP = isFloatType<VT>.ret;
1562 bit isPacked = isPackedType<VT>.ret;
1563 Operand ret = !if(!eq(VT.Size, 64),
1564 !if(isFP, FP64InputMods, Int64InputMods),
1566 !if(!eq(VT.Value, f16.Value),
1570 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
1574 class getOpSelMod <ValueType VT> {
1575 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
1578 // Return type of input modifiers operand specified input operand for DPP
1579 class getSrcModExt <ValueType VT> {
1580 bit isFP = isFloatType<VT>.ret;
1581 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
1584 // Return type of input modifiers operand specified input operand for SDWA
1585 class getSrcModSDWA <ValueType VT> {
1586 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1587 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1588 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1589 Int32SDWAInputMods)));
1592 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1593 class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
1594 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
1595 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1599 // Returns the input arguments for VOP3 instructions for the given SrcVT.
1600 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1601 RegisterOperand Src2RC, int NumSrcArgs,
1602 bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1603 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1606 !if (!eq(NumSrcArgs, 0),
1607 // VOP1 without input operands (V_NOP, V_CLREXCP)
1610 !if (!eq(NumSrcArgs, 1),
1611 !if (!eq(HasModifiers, 1),
1612 // VOP1 with modifiers
1613 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1614 clampmod0:$clamp, omod0:$omod)
1616 // VOP1 without modifiers
1617 !if (!eq(HasIntClamp, 1),
1618 (ins Src0RC:$src0, clampmod0:$clamp),
1621 !if (!eq(NumSrcArgs, 2),
1622 !if (!eq(HasModifiers, 1),
1623 // VOP 2 with modifiers
1624 !if( !eq(HasOMod, 1),
1625 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1626 Src1Mod:$src1_modifiers, Src1RC:$src1,
1627 clampmod0:$clamp, omod0:$omod),
1628 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1629 Src1Mod:$src1_modifiers, Src1RC:$src1,
1632 // VOP2 without modifiers
1633 !if (!eq(HasIntClamp, 1),
1634 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp),
1635 (ins Src0RC:$src0, Src1RC:$src1))
1638 /* NumSrcArgs == 3 */,
1639 !if (!eq(HasModifiers, 1),
1640 !if (!eq(HasSrc2Mods, 1),
1641 // VOP3 with modifiers
1642 !if (!eq(HasOMod, 1),
1643 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1644 Src1Mod:$src1_modifiers, Src1RC:$src1,
1645 Src2Mod:$src2_modifiers, Src2RC:$src2,
1646 clampmod0:$clamp, omod0:$omod),
1647 !if (!eq(HasIntClamp, 1),
1648 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1649 Src1Mod:$src1_modifiers, Src1RC:$src1,
1650 Src2Mod:$src2_modifiers, Src2RC:$src2,
1652 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1653 Src1Mod:$src1_modifiers, Src1RC:$src1,
1654 Src2Mod:$src2_modifiers, Src2RC:$src2))),
1655 // VOP3 with modifiers except src2
1656 !if (!eq(HasOMod, 1),
1657 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1658 Src1Mod:$src1_modifiers, Src1RC:$src1,
1659 Src2RC:$src2, clampmod0:$clamp, omod0:$omod),
1660 !if (!eq(HasIntClamp, 1),
1661 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1662 Src1Mod:$src1_modifiers, Src1RC:$src1,
1663 Src2RC:$src2, clampmod0:$clamp),
1664 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1665 Src1Mod:$src1_modifiers, Src1RC:$src1,
1668 // VOP3 without modifiers
1669 !if (!eq(HasIntClamp, 1),
1670 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp),
1671 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1675 /// XXX - src1 may only allow VGPRs?
1677 // The modifiers (except clamp) are dummy operands for the benefit of
1678 // printing and parsing. They defer their values to looking at the
1679 // srcN_modifiers for what to print.
1680 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1681 RegisterOperand Src2RC, int NumSrcArgs,
1683 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1684 dag ret = !if (!eq(NumSrcArgs, 2),
1686 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1687 Src1Mod:$src1_modifiers, Src1RC:$src1,
1689 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1690 neg_lo:$neg_lo, neg_hi:$neg_hi),
1691 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1692 Src1Mod:$src1_modifiers, Src1RC:$src1,
1693 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1694 neg_lo:$neg_lo, neg_hi:$neg_hi)),
1695 // else NumSrcArgs == 3
1697 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1698 Src1Mod:$src1_modifiers, Src1RC:$src1,
1699 Src2Mod:$src2_modifiers, Src2RC:$src2,
1701 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1702 neg_lo:$neg_lo, neg_hi:$neg_hi),
1703 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1704 Src1Mod:$src1_modifiers, Src1RC:$src1,
1705 Src2Mod:$src2_modifiers, Src2RC:$src2,
1706 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1707 neg_lo:$neg_lo, neg_hi:$neg_hi))
1711 class getInsVOP3OpSel <RegisterOperand Src0RC,
1712 RegisterOperand Src1RC,
1713 RegisterOperand Src2RC,
1719 dag ret = !if (!eq(NumSrcArgs, 2),
1721 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1722 Src1Mod:$src1_modifiers, Src1RC:$src1,
1725 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1726 Src1Mod:$src1_modifiers, Src1RC:$src1,
1728 // else NumSrcArgs == 3
1730 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1731 Src1Mod:$src1_modifiers, Src1RC:$src1,
1732 Src2Mod:$src2_modifiers, Src2RC:$src2,
1735 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1736 Src1Mod:$src1_modifiers, Src1RC:$src1,
1737 Src2Mod:$src2_modifiers, Src2RC:$src2,
1742 class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1743 int NumSrcArgs, bit HasModifiers,
1744 Operand Src0Mod, Operand Src1Mod> {
1746 dag ret = !if (!eq(NumSrcArgs, 0),
1747 // VOP1 without input operands (V_NOP)
1748 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1749 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl),
1750 !if (!eq(NumSrcArgs, 1),
1751 !if (!eq(HasModifiers, 1),
1752 // VOP1_DPP with modifiers
1753 (ins DstRC:$old, Src0Mod:$src0_modifiers,
1754 Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1755 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1757 // VOP1_DPP without modifiers
1758 (ins DstRC:$old, Src0RC:$src0,
1759 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1760 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1762 /* NumSrcArgs == 2 */,
1763 !if (!eq(HasModifiers, 1),
1764 // VOP2_DPP with modifiers
1766 Src0Mod:$src0_modifiers, Src0RC:$src0,
1767 Src1Mod:$src1_modifiers, Src1RC:$src1,
1768 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1769 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1771 // VOP2_DPP without modifiers
1773 Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
1774 row_mask:$row_mask, bank_mask:$bank_mask,
1775 bound_ctrl:$bound_ctrl)
1779 class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1780 int NumSrcArgs, bit HasModifiers,
1781 Operand Src0Mod, Operand Src1Mod> {
1782 dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
1783 HasModifiers, Src0Mod, Src1Mod>.ret,
1787 class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1788 int NumSrcArgs, bit HasModifiers,
1789 Operand Src0Mod, Operand Src1Mod> {
1790 dag ret = !if (!eq(NumSrcArgs, 0),
1791 // VOP1 without input operands (V_NOP)
1792 (ins dpp8:$dpp8, FI:$fi),
1793 !if (!eq(NumSrcArgs, 1),
1794 !if (!eq(HasModifiers, 1),
1795 // VOP1_DPP with modifiers
1796 (ins DstRC:$old, Src0Mod:$src0_modifiers,
1797 Src0RC:$src0, dpp8:$dpp8, FI:$fi)
1799 // VOP1_DPP without modifiers
1800 (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
1802 /* NumSrcArgs == 2 */,
1803 !if (!eq(HasModifiers, 1),
1804 // VOP2_DPP with modifiers
1806 Src0Mod:$src0_modifiers, Src0RC:$src0,
1807 Src1Mod:$src1_modifiers, Src1RC:$src1,
1810 // VOP2_DPP without modifiers
1812 Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
1818 class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1819 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1822 dag ret = !if(!eq(NumSrcArgs, 0),
1823 // VOP1 without input operands (V_NOP)
1825 !if(!eq(NumSrcArgs, 1),
1827 !if(!eq(HasSDWAOMod, 0),
1828 // VOP1_SDWA without omod
1829 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1831 dst_sel:$dst_sel, dst_unused:$dst_unused,
1832 src0_sel:$src0_sel),
1833 // VOP1_SDWA with omod
1834 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1835 clampmod:$clamp, omod:$omod,
1836 dst_sel:$dst_sel, dst_unused:$dst_unused,
1837 src0_sel:$src0_sel)),
1838 !if(!eq(NumSrcArgs, 2),
1839 !if(!eq(DstVT.Size, 1),
1841 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1842 Src1Mod:$src1_modifiers, Src1RC:$src1,
1843 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1845 !if(!eq(HasSDWAOMod, 0),
1846 // VOP2_SDWA without omod
1847 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1848 Src1Mod:$src1_modifiers, Src1RC:$src1,
1850 dst_sel:$dst_sel, dst_unused:$dst_unused,
1851 src0_sel:$src0_sel, src1_sel:$src1_sel),
1852 // VOP2_SDWA with omod
1853 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1854 Src1Mod:$src1_modifiers, Src1RC:$src1,
1855 clampmod:$clamp, omod:$omod,
1856 dst_sel:$dst_sel, dst_unused:$dst_unused,
1857 src0_sel:$src0_sel, src1_sel:$src1_sel))),
1858 (ins)/* endif */)));
1861 // Outs for DPP and SDWA
1862 class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
1863 dag ret = !if(HasDst,
1864 !if(!eq(DstVT.Size, 1),
1865 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1866 (outs DstRCExt:$vdst)),
1871 class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1872 dag ret = !if(HasDst,
1873 !if(!eq(DstVT.Size, 1),
1874 (outs DstRCSDWA:$sdst),
1875 (outs DstRCSDWA:$vdst)),
1879 // Returns the assembly string for the inputs and outputs of a VOP[12C]
1880 // instruction. This does not add the _e32 suffix, so it can be reused
1882 class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1883 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1884 string src0 = ", $src0";
1885 string src1 = ", $src1";
1886 string src2 = ", $src2";
1887 string ret = !if(HasDst, dst, "") #
1888 !if(!eq(NumSrcArgs, 1), src0, "") #
1889 !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1890 !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1893 // Returns the assembly string for the inputs and outputs of a VOP3
1895 class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
1896 bit HasOMod, ValueType DstVT = i32> {
1897 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1898 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1899 string src1 = !if(!eq(NumSrcArgs, 1), "",
1900 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1901 " $src1_modifiers,"));
1902 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
1903 string iclamp = !if(HasIntClamp, "$clamp", "");
1905 !if(!eq(HasModifiers, 0),
1906 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp,
1907 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
1910 // Returns the assembly string for the inputs and outputs of a VOP3P
1912 class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
1913 bit HasClamp, ValueType DstVT = i32> {
1914 string dst = " $vdst";
1915 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1916 string src1 = !if(!eq(NumSrcArgs, 1), "",
1917 !if(!eq(NumSrcArgs, 2), " $src1",
1919 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1921 string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
1922 string clamp = !if(HasClamp, "$clamp", "");
1924 // Each modifier is printed as an array of bits for each operand, so
1925 // all operands are printed as part of src0_modifiers.
1926 string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
1929 class getAsmVOP3OpSel <int NumSrcArgs,
1934 string dst = " $vdst";
1936 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1937 string isrc1 = !if(!eq(NumSrcArgs, 1), "",
1938 !if(!eq(NumSrcArgs, 2), " $src1",
1940 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1942 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1943 string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
1944 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1945 " $src1_modifiers,"));
1946 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
1948 string src0 = !if(Src0HasMods, fsrc0, isrc0);
1949 string src1 = !if(Src1HasMods, fsrc1, isrc1);
1950 string src2 = !if(Src2HasMods, fsrc2, isrc2);
1952 string clamp = !if(HasClamp, "$clamp", "");
1954 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
1957 class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1958 string dst = !if(HasDst,
1959 !if(!eq(DstVT.Size, 1),
1962 ""); // use $sdst for VOPC
1963 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1964 string src1 = !if(!eq(NumSrcArgs, 1), "",
1965 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1966 " $src1_modifiers,"));
1967 string args = !if(!eq(HasModifiers, 0),
1968 getAsm32<0, NumSrcArgs, DstVT>.ret,
1970 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
1973 class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1974 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
1977 class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1978 string dst = !if(HasDst,
1979 !if(!eq(DstVT.Size, 1),
1982 ""); // use $sdst for VOPC
1983 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1984 string src1 = !if(!eq(NumSrcArgs, 1), "",
1985 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1986 " $src1_modifiers,"));
1987 string args = !if(!eq(HasModifiers, 0),
1988 getAsm32<0, NumSrcArgs, DstVT>.ret,
1990 string ret = dst#args#"$dpp8$fi";
1993 class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1994 string dst = !if(HasDst,
1995 !if(!eq(DstVT.Size, 1),
1996 " vcc", // use vcc token as dst for VOPC instructioins
1999 string src0 = "$src0_modifiers";
2000 string src1 = "$src1_modifiers";
2001 string args = !if(!eq(NumSrcArgs, 0),
2003 !if(!eq(NumSrcArgs, 1),
2005 ", "#src0#", "#src1#"$clamp"
2008 string sdwa = !if(!eq(NumSrcArgs, 0),
2010 !if(!eq(NumSrcArgs, 1),
2011 " $dst_sel $dst_unused $src0_sel",
2012 !if(!eq(DstVT.Size, 1),
2013 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2014 " $dst_sel $dst_unused $src0_sel $src1_sel"
2018 string ret = dst#args#sdwa;
2021 class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2022 ValueType DstVT = i32> {
2023 string dst = !if(HasDst,
2024 !if(!eq(DstVT.Size, 1),
2028 string src0 = "$src0_modifiers";
2029 string src1 = "$src1_modifiers";
2030 string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod");
2031 string args = !if(!eq(NumSrcArgs, 0), "",
2032 !if(!eq(NumSrcArgs, 1),
2037 string sdwa = !if(!eq(NumSrcArgs, 0), "",
2038 !if(!eq(NumSrcArgs, 1),
2039 out_mods#" $dst_sel $dst_unused $src0_sel",
2040 !if(!eq(DstVT.Size, 1),
2041 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2042 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2046 string ret = dst#args#sdwa;
2050 // Function that checks if instruction supports DPP and SDWA
2051 class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2052 ValueType Src1VT = i32> {
2053 bit ret = !if(!eq(NumSrcArgs, 3),
2054 0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
2055 !if(!eq(DstVT.Size, 64),
2056 0, // 64-bit dst - No DPP or SDWA for 64-bit operands
2057 !if(!eq(Src0VT.Size, 64),
2059 !if(!eq(Src1VT.Size, 64),
2068 class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2069 ValueType Src1VT = i32> {
2070 bit ret = !if(!eq(NumSrcArgs, 0), 0,
2071 getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2074 class BitOr<bit a, bit b> {
2075 bit ret = !if(a, 1, !if(b, 1, 0));
2078 class BitAnd<bit a, bit b> {
2079 bit ret = !if(a, !if(b, 1, 0), 0);
2087 class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
2088 bit _EnableClamp = 0> {
2090 field list<ValueType> ArgVT = _ArgVT;
2091 field bit EnableF32SrcMods = _EnableF32SrcMods;
2092 field bit EnableClamp = _EnableClamp;
2094 field ValueType DstVT = ArgVT[0];
2095 field ValueType Src0VT = ArgVT[1];
2096 field ValueType Src1VT = ArgVT[2];
2097 field ValueType Src2VT = ArgVT[3];
2098 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2099 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
2100 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2101 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
2102 field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2103 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2104 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2105 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2106 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
2107 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
2108 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2109 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2110 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
2111 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
2112 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
2113 field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
2114 field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
2115 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2116 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2119 field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
2120 field bit HasDst32 = HasDst;
2121 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2122 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2123 field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
2124 field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
2125 field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
2127 // TODO: Modifiers logic is somewhat adhoc here, to be refined later
2128 // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
2129 // enables modifiers for i32 type.
2130 field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
2132 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
2133 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
2134 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
2135 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
2137 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
2138 field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2139 field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2140 field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2142 field bit HasSrc0Mods = HasModifiers;
2143 field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
2144 field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
2146 field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
2147 field bit HasSDWAClamp = EmitDst;
2148 field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
2149 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
2150 field bit HasClampLo = HasClamp;
2151 field bit HasClampHi = BitAnd<isPackedType<DstVT>.ret, HasClamp>.ret;
2152 field bit HasHigh = 0;
2154 field bit IsPacked = isPackedType<Src0VT>.ret;
2155 field bit HasOpSel = IsPacked;
2156 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
2157 field bit HasSDWAOMod = isFloatType<DstVT>.ret;
2159 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2160 field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2161 field bit HasExtSDWA = HasExt;
2162 field bit HasExtSDWA9 = HasExt;
2163 field int NeedPatGen = PatGenMode.NoPattern;
2165 field bit IsMAI = 0;
2166 field bit IsDOT = 0;
2168 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2169 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2170 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2172 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2174 // VOP3b instructions are a special case with a second explicit
2175 // output. This is manually overridden for them.
2176 field dag Outs32 = Outs;
2177 field dag Outs64 = Outs;
2178 field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
2179 field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
2180 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2182 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2183 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2184 HasIntClamp, HasModifiers, HasSrc2Mods,
2185 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2186 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2187 NumSrcArgs, HasClamp,
2188 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2189 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2192 getOpSelMod<Src0VT>.ret,
2193 getOpSelMod<Src1VT>.ret,
2194 getOpSelMod<Src2VT>.ret>.ret;
2195 field dag InsDPP = !if(HasExtDPP,
2196 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
2197 HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
2199 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
2200 HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
2201 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
2202 Src0ModDPP, Src1ModDPP>.ret;
2203 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2204 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2208 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2209 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
2210 field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
2211 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2215 HasSrc2FloatMods>.ret;
2216 field string AsmDPP = !if(HasExtDPP,
2217 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2218 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2219 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
2220 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2221 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2223 field string TieRegDPP = "$old";
2226 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2230 let HasExtSDWA9 = 0;
2233 class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
2234 let NeedPatGen = mode;
2237 def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
2238 def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2239 def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2241 def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2242 def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2243 def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2244 def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2246 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2247 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2249 def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2251 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2252 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2253 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2255 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2256 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2257 def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2258 def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2260 def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2262 def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2264 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2265 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2266 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2267 def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2268 def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2269 def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2270 def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2271 def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2272 def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2273 def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2274 def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2276 def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2277 def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2278 def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2279 def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2280 def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2281 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2282 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2283 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2284 def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
2285 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2286 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2288 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2289 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2290 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2292 def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2293 def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2294 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2295 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2296 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2297 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2298 def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2299 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2300 def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2302 def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2303 def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2305 def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
2306 def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
2307 def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
2308 def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
2309 def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2310 def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2311 def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
2312 def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2313 def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2314 def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
2315 def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
2316 def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
2318 class Commutable_REV <string revOp, bit isOrig> {
2319 string RevOp = revOp;
2320 bit IsOrig = isOrig;
2323 class AtomicNoRet <string noRetOp, bit isRet> {
2324 string NoRetOp = noRetOp;
2328 //===----------------------------------------------------------------------===//
2329 // Interpolation opcodes
2330 //===----------------------------------------------------------------------===//
2332 class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2334 class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2335 VINTRPCommon <outs, ins, "", pattern>,
2336 SIMCInstr<opName, SIEncodingFamily.NONE> {
2338 let isCodeGenOnly = 1;
2341 // FIXME-GFX10: WIP.
2342 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2343 string asm, int encodingFamily> :
2344 VINTRPCommon <outs, ins, asm, []>,
2346 SIMCInstr<opName, encodingFamily> {
2347 let DisableDecoder = DisableSIDecoder;
2350 class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2352 VINTRPCommon <outs, ins, asm, []>,
2354 SIMCInstr<opName, SIEncodingFamily.VI> {
2355 let AssemblerPredicate = VIAssemblerPredicate;
2356 let DecoderNamespace = "GFX8";
2357 let DisableDecoder = DisableVIDecoder;
2360 // FIXME-GFX10: WIP.
2361 multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2362 list<dag> pattern = []> {
2363 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2365 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2366 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2367 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2369 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2371 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
2372 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2373 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
2375 //===----------------------------------------------------------------------===//
2376 // Vector instruction mappings
2377 //===----------------------------------------------------------------------===//
2379 // Maps an opcode in e32 form to its e64 equivalent
2380 def getVOPe64 : InstrMapping {
2381 let FilterClass = "VOP";
2382 let RowFields = ["OpName"];
2383 let ColFields = ["Size", "VOP3"];
2384 let KeyCol = ["4", "0"];
2385 let ValueCols = [["8", "1"]];
2388 // Maps an opcode in e64 form to its e32 equivalent
2389 def getVOPe32 : InstrMapping {
2390 let FilterClass = "VOP";
2391 let RowFields = ["OpName"];
2392 let ColFields = ["Size", "VOP3"];
2393 let KeyCol = ["8", "1"];
2394 let ValueCols = [["4", "0"]];
2397 // Maps ordinary instructions to their SDWA counterparts
2398 def getSDWAOp : InstrMapping {
2399 let FilterClass = "VOP";
2400 let RowFields = ["OpName"];
2401 let ColFields = ["AsmVariantName"];
2402 let KeyCol = ["Default"];
2403 let ValueCols = [["SDWA"]];
2406 // Maps SDWA instructions to their ordinary counterparts
2407 def getBasicFromSDWAOp : InstrMapping {
2408 let FilterClass = "VOP";
2409 let RowFields = ["OpName"];
2410 let ColFields = ["AsmVariantName"];
2411 let KeyCol = ["SDWA"];
2412 let ValueCols = [["Default"]];
2415 // Maps ordinary instructions to their DPP counterparts
2416 def getDPPOp32 : InstrMapping {
2417 let FilterClass = "VOP";
2418 let RowFields = ["OpName"];
2419 let ColFields = ["AsmVariantName"];
2420 let KeyCol = ["Default"];
2421 let ValueCols = [["DPP"]];
2424 // Maps an commuted opcode to its original version
2425 def getCommuteOrig : InstrMapping {
2426 let FilterClass = "Commutable_REV";
2427 let RowFields = ["RevOp"];
2428 let ColFields = ["IsOrig"];
2430 let ValueCols = [["1"]];
2433 // Maps an original opcode to its commuted version
2434 def getCommuteRev : InstrMapping {
2435 let FilterClass = "Commutable_REV";
2436 let RowFields = ["RevOp"];
2437 let ColFields = ["IsOrig"];
2439 let ValueCols = [["0"]];
2442 def getMCOpcodeGen : InstrMapping {
2443 let FilterClass = "SIMCInstr";
2444 let RowFields = ["PseudoInstr"];
2445 let ColFields = ["Subtarget"];
2446 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2447 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2448 [!cast<string>(SIEncodingFamily.VI)],
2449 [!cast<string>(SIEncodingFamily.SDWA)],
2450 [!cast<string>(SIEncodingFamily.SDWA9)],
2451 // GFX80 encoding is added to work around a multiple matching
2452 // issue for buffer instructions with unpacked d16 data. This
2453 // does not actually change the encoding, and thus may be
2455 [!cast<string>(SIEncodingFamily.GFX80)],
2456 [!cast<string>(SIEncodingFamily.GFX9)],
2457 [!cast<string>(SIEncodingFamily.GFX10)],
2458 [!cast<string>(SIEncodingFamily.SDWA10)]];
2461 // Get equivalent SOPK instruction.
2462 def getSOPKOp : InstrMapping {
2463 let FilterClass = "SOPKInstTable";
2464 let RowFields = ["BaseCmpOp"];
2465 let ColFields = ["IsSOPK"];
2467 let ValueCols = [["1"]];
2470 def getAddr64Inst : InstrMapping {
2471 let FilterClass = "MUBUFAddr64Table";
2472 let RowFields = ["OpName"];
2473 let ColFields = ["IsAddr64"];
2475 let ValueCols = [["1"]];
2478 def getIfAddr64Inst : InstrMapping {
2479 let FilterClass = "MUBUFAddr64Table";
2480 let RowFields = ["OpName"];
2481 let ColFields = ["IsAddr64"];
2483 let ValueCols = [["1"]];
2486 def getMUBUFNoLdsInst : InstrMapping {
2487 let FilterClass = "MUBUFLdsTable";
2488 let RowFields = ["OpName"];
2489 let ColFields = ["IsLds"];
2491 let ValueCols = [["0"]];
2494 // Maps an atomic opcode to its version with a return value.
2495 def getAtomicRetOp : InstrMapping {
2496 let FilterClass = "AtomicNoRet";
2497 let RowFields = ["NoRetOp"];
2498 let ColFields = ["IsRet"];
2500 let ValueCols = [["1"]];
2503 // Maps an atomic opcode to its returnless version.
2504 def getAtomicNoRetOp : InstrMapping {
2505 let FilterClass = "AtomicNoRet";
2506 let RowFields = ["NoRetOp"];
2507 let ColFields = ["IsRet"];
2509 let ValueCols = [["0"]];
2512 // Maps a GLOBAL to its SADDR form.
2513 def getGlobalSaddrOp : InstrMapping {
2514 let FilterClass = "GlobalSaddrTable";
2515 let RowFields = ["SaddrOp"];
2516 let ColFields = ["IsSaddr"];
2518 let ValueCols = [["1"]];
2521 // Maps a v_cmpx opcode with sdst to opcode without sdst.
2522 def getVCMPXNoSDstOp : InstrMapping {
2523 let FilterClass = "VCMPXNoSDstTable";
2524 let RowFields = ["NoSDstOp"];
2525 let ColFields = ["HasSDst"];
2527 let ValueCols = [["0"]];
2530 // Maps a SOPP to a SOPP with S_NOP
2531 def getSOPPWithRelaxation : InstrMapping {
2532 let FilterClass = "Base_SOPP";
2533 let RowFields = ["AsmString"];
2534 let ColFields = ["Size"];
2536 let ValueCols = [["8"]];
2539 include "SIInstructions.td"
2541 include "DSInstructions.td"
2542 include "MIMGInstructions.td"