1 //===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
11 def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
14 class GCNPredicateControl : PredicateControl {
15 Predicate SIAssemblerPredicate = isGFX6GFX7;
16 Predicate VIAssemblerPredicate = isGFX8GFX9;
19 // Execpt for the NONE field, this must be kept in sync with the
20 // SIEncodingFamily enum in AMDGPUInstrInfo.cpp
21 def SIEncodingFamily {
34 //===----------------------------------------------------------------------===//
36 //===----------------------------------------------------------------------===//
38 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
40 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
41 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
42 [SDNPMayLoad, SDNPMemOperand]
45 def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
46 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
47 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
50 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
51 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
54 def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
55 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
58 def SIatomic_csub : SDNode<"AMDGPUISD::ATOMIC_LOAD_CSUB", SDTAtomic2,
59 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
62 def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
63 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
66 def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
67 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
70 def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
71 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
74 // load_d16_{lo|hi} ptr, tied_input
75 def SIload_d16 : SDTypeProfile<1, 2, [
81 def SDTtbuffer_load : SDTypeProfile<1, 8,
83 SDTCisVT<1, v4i32>, // rsrc
84 SDTCisVT<2, i32>, // vindex(VGPR)
85 SDTCisVT<3, i32>, // voffset(VGPR)
86 SDTCisVT<4, i32>, // soffset(SGPR)
87 SDTCisVT<5, i32>, // offset(imm)
88 SDTCisVT<6, i32>, // format(imm)
89 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
90 SDTCisVT<8, i1> // idxen(imm)
93 def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
94 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
95 def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
97 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
99 def SDTtbuffer_store : SDTypeProfile<0, 9,
101 SDTCisVT<1, v4i32>, // rsrc
102 SDTCisVT<2, i32>, // vindex(VGPR)
103 SDTCisVT<3, i32>, // voffset(VGPR)
104 SDTCisVT<4, i32>, // soffset(SGPR)
105 SDTCisVT<5, i32>, // offset(imm)
106 SDTCisVT<6, i32>, // format(imm)
107 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
108 SDTCisVT<8, i1> // idxen(imm)
111 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
112 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
113 def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
115 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
117 def SDTBufferLoad : SDTypeProfile<1, 7,
119 SDTCisVT<1, v4i32>, // rsrc
120 SDTCisVT<2, i32>, // vindex(VGPR)
121 SDTCisVT<3, i32>, // voffset(VGPR)
122 SDTCisVT<4, i32>, // soffset(SGPR)
123 SDTCisVT<5, i32>, // offset(imm)
124 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
125 SDTCisVT<7, i1>]>; // idxen(imm)
127 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
128 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
129 def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
130 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
131 def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
132 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
133 def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
135 def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
136 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
137 def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
138 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
139 def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
141 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
143 def SDTBufferStore : SDTypeProfile<0, 8,
145 SDTCisVT<1, v4i32>, // rsrc
146 SDTCisVT<2, i32>, // vindex(VGPR)
147 SDTCisVT<3, i32>, // voffset(VGPR)
148 SDTCisVT<4, i32>, // soffset(SGPR)
149 SDTCisVT<5, i32>, // offset(imm)
150 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
151 SDTCisVT<7, i1>]>; // idxen(imm)
153 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
154 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
155 def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
157 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
158 def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
160 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
161 def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
163 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
164 def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
166 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
168 class SDBufferAtomic<string opcode> : SDNode <opcode,
170 [SDTCisVT<2, v4i32>, // rsrc
171 SDTCisVT<3, i32>, // vindex(VGPR)
172 SDTCisVT<4, i32>, // voffset(VGPR)
173 SDTCisVT<5, i32>, // soffset(SGPR)
174 SDTCisVT<6, i32>, // offset(imm)
175 SDTCisVT<7, i32>, // cachepolicy(imm)
176 SDTCisVT<8, i1>]>, // idxen(imm)
177 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
180 class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
182 [SDTCisVT<0, ty>, // vdata
183 SDTCisVT<1, v4i32>, // rsrc
184 SDTCisVT<2, i32>, // vindex(VGPR)
185 SDTCisVT<3, i32>, // voffset(VGPR)
186 SDTCisVT<4, i32>, // soffset(SGPR)
187 SDTCisVT<5, i32>, // offset(imm)
188 SDTCisVT<6, i32>, // cachepolicy(imm)
189 SDTCisVT<7, i1>]>, // idxen(imm)
190 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
193 def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
194 def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
195 def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
196 def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
197 def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
198 def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
199 def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
200 def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
201 def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
202 def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
203 def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
204 def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
205 def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
206 def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
207 def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
209 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
211 [SDTCisVT<0, i32>, // dst
212 SDTCisVT<1, i32>, // src
213 SDTCisVT<2, i32>, // cmp
214 SDTCisVT<3, v4i32>, // rsrc
215 SDTCisVT<4, i32>, // vindex(VGPR)
216 SDTCisVT<5, i32>, // voffset(VGPR)
217 SDTCisVT<6, i32>, // soffset(SGPR)
218 SDTCisVT<7, i32>, // offset(imm)
219 SDTCisVT<8, i32>, // cachepolicy(imm)
220 SDTCisVT<9, i1>]>, // idxen(imm)
221 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
224 class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
226 [SDTCisPtrTy<0>, // vaddr
227 SDTCisVT<1, ty>]>, // vdata
228 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
231 def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
233 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
234 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
237 def SIlds : SDNode<"AMDGPUISD::LDS",
238 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
241 def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
243 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
246 def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
248 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
251 def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
253 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
256 def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
258 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
261 def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
263 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
266 def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
268 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
271 def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
272 SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
273 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
276 //===----------------------------------------------------------------------===//
278 //===----------------------------------------------------------------------===//
280 // Returns 1 if the source arguments have modifiers, 0 if they do not.
281 // XXX - do f16 instructions?
282 class isFloatType<ValueType SrcVT> {
284 !if(!eq(SrcVT.Value, f16.Value), 1,
285 !if(!eq(SrcVT.Value, f32.Value), 1,
286 !if(!eq(SrcVT.Value, f64.Value), 1,
287 !if(!eq(SrcVT.Value, v2f16.Value), 1,
288 !if(!eq(SrcVT.Value, v4f16.Value), 1,
289 !if(!eq(SrcVT.Value, v2f32.Value), 1,
290 !if(!eq(SrcVT.Value, v2f64.Value), 1,
294 class isIntType<ValueType SrcVT> {
296 !if(!eq(SrcVT.Value, i16.Value), 1,
297 !if(!eq(SrcVT.Value, i32.Value), 1,
298 !if(!eq(SrcVT.Value, i64.Value), 1,
302 class isPackedType<ValueType SrcVT> {
304 !if(!eq(SrcVT.Value, v2i16.Value), 1,
305 !if(!eq(SrcVT.Value, v2f16.Value), 1,
306 !if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
310 //===----------------------------------------------------------------------===//
311 // PatFrags for global memory operations
312 //===----------------------------------------------------------------------===//
314 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_global").AddrSpaces in {
315 defm atomic_csub_global : binary_atomic_op<SIatomic_csub>;
318 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
319 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
322 defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
323 defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
324 defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
325 defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
328 } // End let AddressSpaces = ...
329 } // End foreach AddrSpace
331 def atomic_fadd_global_noret : PatFrag<
332 (ops node:$ptr, node:$value),
333 (atomic_load_fadd node:$ptr, node:$value)> {
337 let AddressSpaces = StoreAddress_global.AddrSpaces;
340 def atomic_pk_fadd_global_noret : PatFrag<
341 (ops node:$ptr, node:$value),
342 (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> {
344 let MemoryVT = v2f16;
346 let AddressSpaces = StoreAddress_global.AddrSpaces;
349 //===----------------------------------------------------------------------===//
350 // SDNodes PatFrags for loads/stores with a glue input.
351 // This is for SDNodes and PatFrag for local loads and stores to
352 // enable s_mov_b32 m0, -1 to be glued to the memory instructions.
354 // These mirror the regular load/store PatFrags and rely on special
355 // processing during Select() to add the glued copy.
357 //===----------------------------------------------------------------------===//
359 def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
360 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
363 def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
364 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
367 def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
372 def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
374 let IsNonExtLoad = 1;
377 def atomic_load_32_glue : PatFrag<(ops node:$ptr),
378 (AMDGPUatomic_ld_glue node:$ptr)> {
383 def atomic_load_64_glue : PatFrag<(ops node:$ptr),
384 (AMDGPUatomic_ld_glue node:$ptr)> {
389 def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
391 let IsAnyExtLoad = 1;
394 def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
396 let IsSignExtLoad = 1;
399 def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
401 let IsZeroExtLoad = 1;
404 def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
409 def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
414 def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
419 def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
424 def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
429 def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
435 let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
436 def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
437 let IsNonExtLoad = 1;
440 let MemoryVT = i8 in {
441 def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
442 def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
443 def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
446 let MemoryVT = i16 in {
447 def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
448 def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
449 def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
452 def load_align8_local_m0 : PatFrag<(ops node:$ptr),
453 (load_local_m0 node:$ptr)> {
455 let IsNonExtLoad = 1;
456 let MinAlignment = 8;
458 def load_align16_local_m0 : PatFrag<(ops node:$ptr),
459 (load_local_m0 node:$ptr)> {
461 let IsNonExtLoad = 1;
462 let MinAlignment = 16;
467 let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
468 def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
469 (atomic_load_32_glue node:$ptr)> {
472 def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
473 (atomic_load_64_glue node:$ptr)> {
477 } // End let AddressSpaces = LoadAddress_local.AddrSpaces
480 def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
481 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
484 def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
485 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
488 def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
489 (AMDGPUst_glue node:$val, node:$ptr)> {
494 def store_glue : PatFrag<(ops node:$val, node:$ptr),
495 (unindexedstore_glue node:$val, node:$ptr)> {
497 let IsTruncStore = 0;
500 def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
501 (unindexedstore_glue node:$val, node:$ptr)> {
503 let IsTruncStore = 1;
506 def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
507 (truncstore_glue node:$val, node:$ptr)> {
512 def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
513 (truncstore_glue node:$val, node:$ptr)> {
518 let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
519 def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
520 (store_glue node:$val, node:$ptr)> {
522 let IsTruncStore = 0;
525 def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
526 (unindexedstore_glue node:$val, node:$ptr)> {
531 def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
532 (unindexedstore_glue node:$val, node:$ptr)> {
538 def store_align16_local_m0 : PatFrag <
539 (ops node:$value, node:$ptr),
540 (store_local_m0 node:$value, node:$ptr)> {
542 let IsTruncStore = 0;
543 let MinAlignment = 16;
546 def store_align8_local_m0 : PatFrag <
547 (ops node:$value, node:$ptr),
548 (store_local_m0 node:$value, node:$ptr)> {
550 let IsTruncStore = 0;
551 let MinAlignment = 8;
554 let AddressSpaces = StoreAddress_local.AddrSpaces in {
556 def atomic_store_local_32_m0 : PatFrag <
557 (ops node:$value, node:$ptr),
558 (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
562 def atomic_store_local_64_m0 : PatFrag <
563 (ops node:$value, node:$ptr),
564 (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
568 } // End let AddressSpaces = StoreAddress_local.AddrSpaces
571 def si_setcc_uniform : PatFrag <
572 (ops node:$lhs, node:$rhs, node:$cond),
573 (setcc node:$lhs, node:$rhs, node:$cond), [{
574 for (SDNode *Use : N->uses()) {
575 if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
578 unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
579 if (Reg != AMDGPU::SCC)
585 //===----------------------------------------------------------------------===//
586 // SDNodes PatFrags for d16 loads
587 //===----------------------------------------------------------------------===//
589 class LoadD16Frag <SDPatternOperator op> : PatFrag<
590 (ops node:$ptr, node:$tied_in),
591 (op node:$ptr, node:$tied_in)> {
595 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
596 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
598 def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
600 def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
604 def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
608 def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
610 def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
614 def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
618 } // End let AddressSpaces = ...
619 } // End foreach AddrSpace
621 def lshr_rev : PatFrag <
622 (ops node:$src1, node:$src0),
626 def ashr_rev : PatFrag <
627 (ops node:$src1, node:$src0),
631 def lshl_rev : PatFrag <
632 (ops node:$src1, node:$src0),
636 def add_ctpop : PatFrag <
637 (ops node:$src0, node:$src1),
638 (add (ctpop $src0), $src1)
642 def shl#I#_add : PatFrag <
643 (ops node:$src0, node:$src1),
644 (add (shl_oneuse $src0, (i32 I)), $src1)> {
645 // FIXME: Poor substitute for disabling pattern in SelectionDAG
646 let PredicateCode = [{return false;}];
647 let GISelPredicateCode = [{return true;}];
651 multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
652 SDTypeProfile tc = SDTAtomic2,
656 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
657 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
660 let AddressSpaces = StoreAddress_local.AddrSpaces in {
661 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
664 let AddressSpaces = StoreAddress_region.AddrSpaces in {
665 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
669 defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
670 defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
671 defm atomic_load_csub : SIAtomicM0Glue2 <"LOAD_CSUB", 1>;
672 defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
673 defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
674 defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
675 defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
676 defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
677 defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
678 defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
679 defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
680 defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
681 defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
682 defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
683 defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
684 defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
686 def as_i1timm : SDNodeXForm<timm, [{
687 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
690 def as_i8imm : SDNodeXForm<imm, [{
691 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
694 def as_i8timm : SDNodeXForm<timm, [{
695 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
698 def as_i16imm : SDNodeXForm<imm, [{
699 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
702 def as_i16timm : SDNodeXForm<timm, [{
703 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
706 def as_i32imm: SDNodeXForm<imm, [{
707 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
710 def as_i32timm: SDNodeXForm<timm, [{
711 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
714 def as_i64imm: SDNodeXForm<imm, [{
715 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
718 def cond_as_i32imm: SDNodeXForm<cond, [{
719 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
722 // Copied from the AArch64 backend:
723 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
724 return CurDAG->getTargetConstant(
725 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
728 def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
729 auto FI = cast<FrameIndexSDNode>(N);
730 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
733 // Copied from the AArch64 backend:
734 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
735 return CurDAG->getTargetConstant(
736 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
739 class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
740 uint64_t Imm = N->getZExtValue();
741 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
742 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
745 def SIMM16bit : ImmLeaf <i32,
746 [{return isInt<16>(Imm);}]
749 def UIMM16bit : ImmLeaf <i32,
750 [{return isUInt<16>(Imm);}]
753 def i64imm_32bit : ImmLeaf<i64, [{
754 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
757 def InlineImm16 : ImmLeaf<i16, [{
758 return isInlineImmediate16(Imm);
761 def InlineImm32 : ImmLeaf<i32, [{
762 return isInlineImmediate32(Imm);
765 def InlineImm64 : ImmLeaf<i64, [{
766 return isInlineImmediate64(Imm);
769 def InlineImmFP32 : FPImmLeaf<f32, [{
770 return isInlineImmediate(Imm);
773 def InlineImmFP64 : FPImmLeaf<f64, [{
774 return isInlineImmediate(Imm);
778 class VGPRImm <dag frag> : PatLeaf<frag, [{
782 def NegateImm : SDNodeXForm<imm, [{
783 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
786 // TODO: When FP inline imm values work?
787 def NegSubInlineConst32 : ImmLeaf<i32, [{
788 return Imm < -16 && Imm >= -64;
791 def NegSubInlineIntConst16 : ImmLeaf<i16, [{
792 return Imm < -16 && Imm >= -64;
795 def ShiftAmt32Imm : ImmLeaf <i32, [{
799 def getNegV2I16Imm : SDNodeXForm<build_vector, [{
800 return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
803 def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
804 assert(N->getNumOperands() == 2);
805 assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
806 SDValue Src0 = N->getOperand(0);
807 SDValue Src1 = N->getOperand(1);
809 return isNegInlineImmediate(Src0.getNode());
811 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
812 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
815 //===----------------------------------------------------------------------===//
816 // MUBUF/SMEM Patterns
817 //===----------------------------------------------------------------------===//
819 def extract_glc : SDNodeXForm<timm, [{
820 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
823 def extract_slc : SDNodeXForm<timm, [{
824 return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
827 def extract_dlc : SDNodeXForm<timm, [{
828 return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
831 def extract_swz : SDNodeXForm<timm, [{
832 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
835 //===----------------------------------------------------------------------===//
837 //===----------------------------------------------------------------------===//
839 def SoppBrTarget : AsmOperandClass {
840 let Name = "SoppBrTarget";
841 let ParserMethod = "parseSOppBrTarget";
844 def sopp_brtarget : Operand<OtherVT> {
845 let EncoderMethod = "getSOPPBrEncoding";
846 let DecoderMethod = "decodeSoppBrTarget";
847 let OperandType = "OPERAND_PCREL";
848 let ParserMatchClass = SoppBrTarget;
851 def si_ga : Operand<iPTR>;
853 def InterpSlotMatchClass : AsmOperandClass {
854 let Name = "InterpSlot";
855 let PredicateMethod = "isInterpSlot";
856 let ParserMethod = "parseInterpSlot";
857 let RenderMethod = "addImmOperands";
860 def InterpSlot : Operand<i32> {
861 let PrintMethod = "printInterpSlot";
862 let ParserMatchClass = InterpSlotMatchClass;
863 let OperandType = "OPERAND_IMMEDIATE";
866 def AttrMatchClass : AsmOperandClass {
868 let PredicateMethod = "isInterpAttr";
869 let ParserMethod = "parseInterpAttr";
870 let RenderMethod = "addImmOperands";
873 // It appears to be necessary to create a separate operand for this to
874 // be able to parse attr<num> with no space.
875 def Attr : Operand<i32> {
876 let PrintMethod = "printInterpAttr";
877 let ParserMatchClass = AttrMatchClass;
878 let OperandType = "OPERAND_IMMEDIATE";
881 def AttrChanMatchClass : AsmOperandClass {
882 let Name = "AttrChan";
883 let PredicateMethod = "isAttrChan";
884 let RenderMethod = "addImmOperands";
887 def AttrChan : Operand<i32> {
888 let PrintMethod = "printInterpAttrChan";
889 let ParserMatchClass = AttrChanMatchClass;
890 let OperandType = "OPERAND_IMMEDIATE";
893 def SendMsgMatchClass : AsmOperandClass {
894 let Name = "SendMsg";
895 let PredicateMethod = "isSendMsg";
896 let ParserMethod = "parseSendMsgOp";
897 let RenderMethod = "addImmOperands";
900 def SwizzleMatchClass : AsmOperandClass {
901 let Name = "Swizzle";
902 let PredicateMethod = "isSwizzle";
903 let ParserMethod = "parseSwizzleOp";
904 let RenderMethod = "addImmOperands";
908 def EndpgmMatchClass : AsmOperandClass {
909 let Name = "EndpgmImm";
910 let PredicateMethod = "isEndpgm";
911 let ParserMethod = "parseEndpgmOp";
912 let RenderMethod = "addImmOperands";
916 def ExpTgtMatchClass : AsmOperandClass {
918 let PredicateMethod = "isExpTgt";
919 let ParserMethod = "parseExpTgt";
920 let RenderMethod = "printExpTgt";
923 def SWaitMatchClass : AsmOperandClass {
924 let Name = "SWaitCnt";
925 let RenderMethod = "addImmOperands";
926 let ParserMethod = "parseSWaitCntOps";
929 def VReg32OrOffClass : AsmOperandClass {
930 let Name = "VReg32OrOff";
931 let ParserMethod = "parseVReg32OrOff";
934 let OperandType = "OPERAND_IMMEDIATE" in {
935 def SendMsgImm : Operand<i32> {
936 let PrintMethod = "printSendMsg";
937 let ParserMatchClass = SendMsgMatchClass;
940 def SwizzleImm : Operand<i16> {
941 let PrintMethod = "printSwizzle";
942 let ParserMatchClass = SwizzleMatchClass;
945 def EndpgmImm : Operand<i16> {
946 let PrintMethod = "printEndpgm";
947 let ParserMatchClass = EndpgmMatchClass;
950 def WAIT_FLAG : Operand <i32> {
951 let ParserMatchClass = SWaitMatchClass;
952 let PrintMethod = "printWaitFlag";
954 } // End OperandType = "OPERAND_IMMEDIATE"
956 include "SIInstrFormats.td"
957 include "VIInstrFormats.td"
959 def BoolReg : AsmOperandClass {
960 let Name = "BoolReg";
961 let ParserMethod = "parseBoolReg";
962 let RenderMethod = "addRegOperands";
965 class BoolRC : RegisterOperand<SReg_1> {
966 let ParserMatchClass = BoolReg;
967 let DecoderMethod = "decodeBoolReg";
970 def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
971 let ParserMatchClass = BoolReg;
972 let DecoderMethod = "decodeBoolReg";
975 def VOPDstS64orS32 : BoolRC {
976 let PrintMethod = "printVOPDst";
979 // SCSrc_i1 is the operand for pseudo instructions only.
980 // Boolean immediates shall not be exposed to codegen instructions.
981 def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
982 let OperandNamespace = "AMDGPU";
983 let OperandType = "OPERAND_REG_IMM_INT32";
984 let ParserMatchClass = BoolReg;
985 let DecoderMethod = "decodeBoolReg";
988 // ===----------------------------------------------------------------------===//
989 // ExpSrc* Special cases for exp src operands which are printed as
990 // "off" depending on en operand.
991 // ===----------------------------------------------------------------------===//
993 def ExpSrc0 : RegisterOperand<VGPR_32> {
994 let PrintMethod = "printExpSrc0";
995 let ParserMatchClass = VReg32OrOffClass;
998 def ExpSrc1 : RegisterOperand<VGPR_32> {
999 let PrintMethod = "printExpSrc1";
1000 let ParserMatchClass = VReg32OrOffClass;
1003 def ExpSrc2 : RegisterOperand<VGPR_32> {
1004 let PrintMethod = "printExpSrc2";
1005 let ParserMatchClass = VReg32OrOffClass;
1008 def ExpSrc3 : RegisterOperand<VGPR_32> {
1009 let PrintMethod = "printExpSrc3";
1010 let ParserMatchClass = VReg32OrOffClass;
1013 class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
1014 let OperandNamespace = "AMDGPU";
1015 string Type = !if(isFloatType<vt>.ret, "FP", "INT");
1016 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
1017 let DecoderMethod = "decodeSDWASrc"#vt.Size;
1018 let EncoderMethod = "getSDWASrcEncoding";
1021 def SDWASrc_i32 : SDWASrc<i32>;
1022 def SDWASrc_i16 : SDWASrc<i16>;
1023 def SDWASrc_f32 : SDWASrc<f32>;
1024 def SDWASrc_f16 : SDWASrc<f16>;
1026 def SDWAVopcDst : BoolRC {
1027 let OperandNamespace = "AMDGPU";
1028 let OperandType = "OPERAND_SDWA_VOPC_DST";
1029 let EncoderMethod = "getSDWAVopcDstEncoding";
1030 let DecoderMethod = "decodeSDWAVopcDst";
1031 let PrintMethod = "printVOPDst";
1034 class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
1035 let Name = "Imm"#CName;
1036 let PredicateMethod = "is"#CName;
1037 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName);
1038 let RenderMethod = "addImmOperands";
1039 let IsOptional = Optional;
1040 let DefaultMethod = !if(Optional, "default"#CName, ?);
1043 class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
1044 let PrintMethod = "print"#Name;
1045 let ParserMatchClass = MatchClass;
1048 class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> :
1049 OperandWithDefaultOps<i1, (ops (i1 0))> {
1050 let PrintMethod = "print"#Name;
1051 let ParserMatchClass = MatchClass;
1054 class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
1055 let PrintMethod = "print"#Name;
1056 let ParserMatchClass = MatchClass;
1059 class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
1060 let PrintMethod = "print"#Name;
1061 let ParserMatchClass = MatchClass;
1064 class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
1065 let PrintMethod = "print"#Name;
1066 let ParserMatchClass = MatchClass;
1069 class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> :
1070 OperandWithDefaultOps<i32, (ops (i32 0))> {
1071 let PrintMethod = "print"#Name;
1072 let ParserMatchClass = MatchClass;
1075 class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
1076 OperandWithDefaultOps<i32, (ops (i32 0))> {
1077 let PrintMethod = "print"#Name;
1078 let ParserMatchClass = MatchClass;
1081 let OperandType = "OPERAND_IMMEDIATE" in {
1083 def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
1084 def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
1085 def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
1087 def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>;
1088 def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
1089 def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
1090 def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
1092 def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
1094 def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
1095 def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>;
1097 // We need to make the cases with a default of 0 distinct from no
1098 // default to help deal with some cases where the operand appears
1099 // before a mandatory operand.
1100 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
1101 def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
1102 def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
1104 def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
1105 def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
1106 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
1107 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
1108 def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
1109 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
1110 def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
1111 def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
1112 def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>;
1113 def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
1114 def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
1115 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
1116 def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
1118 def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
1120 def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
1121 def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
1123 def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
1125 def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
1126 def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
1127 def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
1128 def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
1129 def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
1131 def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
1132 def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
1133 def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
1134 def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
1136 def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
1137 def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
1138 def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
1139 def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
1141 def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
1142 def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
1143 def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
1145 def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
1147 def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
1151 } // End OperandType = "OPERAND_IMMEDIATE"
1153 class KImmMatchClass<int size> : AsmOperandClass {
1154 let Name = "KImmFP"#size;
1155 let PredicateMethod = "isKImmFP"#size;
1156 let ParserMethod = "parseImm";
1157 let RenderMethod = "addKImmFP"#size#"Operands";
1160 class kimmOperand<ValueType vt> : Operand<vt> {
1161 let OperandNamespace = "AMDGPU";
1162 let OperandType = "OPERAND_KIMM"#vt.Size;
1163 let PrintMethod = "printU"#vt.Size#"ImmOperand";
1164 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
1167 // 32-bit VALU immediate operand that uses the constant bus.
1168 def KImmFP32MatchClass : KImmMatchClass<32>;
1169 def f32kimm : kimmOperand<i32>;
1171 // 32-bit VALU immediate operand with a 16-bit value that uses the
1173 def KImmFP16MatchClass : KImmMatchClass<16>;
1174 def f16kimm : kimmOperand<i16>;
1176 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1177 let Name = "RegOrImmWithFP"#opSize#"InputMods";
1178 let ParserMethod = "parseRegOrImmWithFPInputMods";
1179 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1182 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1183 def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1184 def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1186 class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1187 let OperandNamespace = "AMDGPU";
1188 let OperandType = "OPERAND_INPUT_MODS";
1189 let ParserMatchClass = matchClass;
1192 class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1193 let PrintMethod = "printOperandAndFPInputMods";
1196 def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1197 def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1198 def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1200 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1201 let Name = "RegOrImmWithInt"#opSize#"InputMods";
1202 let ParserMethod = "parseRegOrImmWithIntInputMods";
1203 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1205 def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1206 def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1208 class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1209 let PrintMethod = "printOperandAndIntInputMods";
1211 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1212 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1214 class OpSelModsMatchClass : AsmOperandClass {
1215 let Name = "OpSelMods";
1216 let ParserMethod = "parseRegOrImm";
1217 let PredicateMethod = "isRegOrImm";
1220 def IntOpSelModsMatchClass : OpSelModsMatchClass;
1221 def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1223 class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1224 let Name = "SDWAWithFP"#opSize#"InputMods";
1225 let ParserMethod = "parseRegOrImmWithFPInputMods";
1226 let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1229 def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1230 def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1232 class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1233 InputMods <matchClass> {
1234 let PrintMethod = "printOperandAndFPInputMods";
1237 def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1238 def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1240 def FPVRegInputModsMatchClass : AsmOperandClass {
1241 let Name = "VRegWithFPInputMods";
1242 let ParserMethod = "parseRegWithFPInputMods";
1243 let PredicateMethod = "isVReg32";
1246 def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1247 let PrintMethod = "printOperandAndFPInputMods";
1250 class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1251 let Name = "SDWAWithInt"#opSize#"InputMods";
1252 let ParserMethod = "parseRegOrImmWithIntInputMods";
1253 let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1256 def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1257 def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1259 class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1260 InputMods <matchClass> {
1261 let PrintMethod = "printOperandAndIntInputMods";
1264 def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1265 def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1267 def IntVRegInputModsMatchClass : AsmOperandClass {
1268 let Name = "VRegWithIntInputMods";
1269 let ParserMethod = "parseRegWithIntInputMods";
1270 let PredicateMethod = "isVReg32";
1273 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1274 let PrintMethod = "printOperandAndIntInputMods";
1277 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1278 let Name = "PackedFP"#opSize#"InputMods";
1279 let ParserMethod = "parseRegOrImm";
1280 let PredicateMethod = "isRegOrImm";
1281 // let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1284 class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1285 let Name = "PackedInt"#opSize#"InputMods";
1286 let ParserMethod = "parseRegOrImm";
1287 let PredicateMethod = "isRegOrImm";
1288 // let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1291 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1292 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1294 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1295 // let PrintMethod = "printPackedFPInputMods";
1298 class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1299 //let PrintMethod = "printPackedIntInputMods";
1302 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1303 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1305 //===----------------------------------------------------------------------===//
1307 //===----------------------------------------------------------------------===//
1309 def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
1310 def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
1312 def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
1314 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1315 def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
1316 def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1317 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1318 // VOP3Mods, but the input source is known to never be NaN.
1319 def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
1321 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1323 def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1325 def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1327 def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1329 def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1332 def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">;
1334 //===----------------------------------------------------------------------===//
1335 // SI assembler operands
1336 //===----------------------------------------------------------------------===//
1341 int FLAT_SCR = 0x68;
1344 // This should be kept in sync with SISrcMods enum
1368 int LLVM_DEBUG_TRAP = 3;
1384 int FLAT_SCR_LO = 20;
1385 int FLAT_SCR_HI = 21;
1386 int XNACK_MASK = 22;
1387 int POPS_PACKER = 25;
1388 int SHADER_CYCLES = 29;
1391 class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1393 !or(!shl(Offset, 6),
1394 !shl(!add(Size, -1), 11)));
1397 //===----------------------------------------------------------------------===//
1399 // SI Instruction multiclass helpers.
1401 // Instructions with _32 take 32-bit operands.
1402 // Instructions with _64 take 64-bit operands.
1404 // VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
1405 // encoding is the standard encoding, but instruction that make use of
1406 // any of the instruction modifiers must use the 64-bit encoding.
1408 // Instructions with _e32 use the 32-bit encoding.
1409 // Instructions with _e64 use the 64-bit encoding.
1411 //===----------------------------------------------------------------------===//
1413 class SIMCInstr <string pseudo, int subtarget> {
1414 string PseudoInstr = pseudo;
1415 int Subtarget = subtarget;
1418 //===----------------------------------------------------------------------===//
1420 //===----------------------------------------------------------------------===//
1422 class EXP_Helper<bit done> : EXPCommon<
1425 ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
1426 exp_vm:$vm, exp_compr:$compr, i32imm:$en),
1427 "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
1428 let AsmMatchConverter = "cvtExp";
1431 // Split EXP instruction into EXP and EXP_DONE so we can set
1432 // mayLoad for done=1.
1433 multiclass EXP_m<bit done> {
1434 let mayLoad = done, DisableWQM = 1 in {
1435 let isPseudo = 1, isCodeGenOnly = 1 in {
1436 def "" : EXP_Helper<done>,
1437 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
1440 let done = done in {
1441 def _si : EXP_Helper<done>,
1442 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
1444 let AssemblerPredicate = isGFX6GFX7;
1445 let DecoderNamespace = "GFX6GFX7";
1446 let DisableDecoder = DisableSIDecoder;
1449 def _vi : EXP_Helper<done>,
1450 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
1452 let AssemblerPredicate = isGFX8GFX9;
1453 let DecoderNamespace = "GFX8";
1454 let DisableDecoder = DisableVIDecoder;
1457 def _gfx10 : EXP_Helper<done>,
1458 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
1460 let AssemblerPredicate = isGFX10Plus;
1461 let DecoderNamespace = "GFX10";
1462 let DisableDecoder = DisableSIDecoder;
1468 //===----------------------------------------------------------------------===//
1469 // Vector ALU classes
1470 //===----------------------------------------------------------------------===//
1472 class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1474 !if (!eq(Src0.Value, untyped.Value), 0,
1475 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
1476 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
1480 // Returns the register class to use for the destination of VOP[123C]
1481 // instructions for the given VT.
1482 class getVALUDstForVT<ValueType VT> {
1483 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1484 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1485 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1486 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
1487 VOPDstS64orS32)))); // else VT == i1
1490 // Returns the register class to use for the destination of VOP[12C]
1491 // instructions with SDWA extension
1492 class getSDWADstForVT<ValueType VT> {
1493 RegisterOperand ret = !if(!eq(VT.Size, 1),
1494 SDWAVopcDst, // VOPC
1495 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1498 // Returns the register class to use for source 0 of VOP[12C]
1499 // instructions for the given VT.
1500 class getVOPSrc0ForVT<ValueType VT> {
1501 bit isFP = isFloatType<VT>.ret;
1503 RegisterOperand ret =
1505 !if(!eq(VT.Size, 64),
1507 !if(!eq(VT.Value, f16.Value),
1509 !if(!eq(VT.Value, v2f16.Value),
1511 !if(!eq(VT.Value, v4f16.Value),
1518 !if(!eq(VT.Size, 64),
1520 !if(!eq(VT.Value, i16.Value),
1522 !if(!eq(VT.Value, v2i16.Value),
1531 // Returns the vreg register class to use for source operand given VT
1532 class getVregSrcForVT<ValueType VT> {
1533 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
1534 !if(!eq(VT.Size, 96), VReg_96,
1535 !if(!eq(VT.Size, 64), VReg_64,
1536 !if(!eq(VT.Size, 48), VReg_64,
1540 class getSDWASrcForVT <ValueType VT> {
1541 bit isFP = isFloatType<VT>.ret;
1542 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1543 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1544 RegisterOperand ret = !if(isFP, retFlt, retInt);
1547 // Returns the register class to use for sources of VOP3 instructions for the
1549 class getVOP3SrcForVT<ValueType VT> {
1550 bit isFP = isFloatType<VT>.ret;
1551 RegisterOperand ret =
1552 !if(!eq(VT.Size, 128),
1554 !if(!eq(VT.Size, 64),
1558 !if(!eq(VT.Value, i1.Value),
1561 !if(!eq(VT.Value, f16.Value),
1563 !if(!eq(VT.Value, v2f16.Value),
1565 !if(!eq(VT.Value, v4f16.Value),
1571 !if(!eq(VT.Value, i16.Value),
1573 !if(!eq(VT.Value, v2i16.Value),
1584 // Float or packed int
1585 class isModifierType<ValueType SrcVT> {
1587 !if(!eq(SrcVT.Value, f16.Value), 1,
1588 !if(!eq(SrcVT.Value, f32.Value), 1,
1589 !if(!eq(SrcVT.Value, f64.Value), 1,
1590 !if(!eq(SrcVT.Value, v2f16.Value), 1,
1591 !if(!eq(SrcVT.Value, v2i16.Value), 1,
1595 // Return type of input modifiers operand for specified input operand
1596 class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
1597 bit isFP = isFloatType<VT>.ret;
1598 bit isPacked = isPackedType<VT>.ret;
1599 Operand ret = !if(!eq(VT.Size, 64),
1600 !if(isFP, FP64InputMods, Int64InputMods),
1602 !if(!eq(VT.Value, f16.Value),
1606 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
1610 class getOpSelMod <ValueType VT> {
1611 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
1614 // Return type of input modifiers operand specified input operand for DPP
1615 class getSrcModExt <ValueType VT> {
1616 bit isFP = isFloatType<VT>.ret;
1617 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
1620 // Return type of input modifiers operand specified input operand for SDWA
1621 class getSrcModSDWA <ValueType VT> {
1622 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1623 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1624 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1625 Int32SDWAInputMods)));
1628 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1629 class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
1630 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
1631 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1635 // Returns the input arguments for VOP3 instructions for the given SrcVT.
1636 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1637 RegisterOperand Src2RC, int NumSrcArgs,
1638 bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1639 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1642 !if (!eq(NumSrcArgs, 0),
1643 // VOP1 without input operands (V_NOP, V_CLREXCP)
1646 !if (!eq(NumSrcArgs, 1),
1647 !if (!eq(HasModifiers, 1),
1648 // VOP1 with modifiers
1649 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1650 clampmod0:$clamp, omod0:$omod)
1652 // VOP1 without modifiers
1653 !if (!eq(HasIntClamp, 1),
1654 (ins Src0RC:$src0, clampmod0:$clamp),
1657 !if (!eq(NumSrcArgs, 2),
1658 !if (!eq(HasModifiers, 1),
1659 // VOP 2 with modifiers
1660 !if( !eq(HasOMod, 1),
1661 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1662 Src1Mod:$src1_modifiers, Src1RC:$src1,
1663 clampmod0:$clamp, omod0:$omod),
1664 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1665 Src1Mod:$src1_modifiers, Src1RC:$src1,
1668 // VOP2 without modifiers
1669 !if (!eq(HasIntClamp, 1),
1670 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp),
1671 (ins Src0RC:$src0, Src1RC:$src1))
1674 /* NumSrcArgs == 3 */,
1675 !if (!eq(HasModifiers, 1),
1676 !if (!eq(HasSrc2Mods, 1),
1677 // VOP3 with modifiers
1678 !if (!eq(HasOMod, 1),
1679 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1680 Src1Mod:$src1_modifiers, Src1RC:$src1,
1681 Src2Mod:$src2_modifiers, Src2RC:$src2,
1682 clampmod0:$clamp, omod0:$omod),
1683 !if (!eq(HasIntClamp, 1),
1684 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1685 Src1Mod:$src1_modifiers, Src1RC:$src1,
1686 Src2Mod:$src2_modifiers, Src2RC:$src2,
1688 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1689 Src1Mod:$src1_modifiers, Src1RC:$src1,
1690 Src2Mod:$src2_modifiers, Src2RC:$src2))),
1691 // VOP3 with modifiers except src2
1692 !if (!eq(HasOMod, 1),
1693 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1694 Src1Mod:$src1_modifiers, Src1RC:$src1,
1695 Src2RC:$src2, clampmod0:$clamp, omod0:$omod),
1696 !if (!eq(HasIntClamp, 1),
1697 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1698 Src1Mod:$src1_modifiers, Src1RC:$src1,
1699 Src2RC:$src2, clampmod0:$clamp),
1700 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1701 Src1Mod:$src1_modifiers, Src1RC:$src1,
1704 // VOP3 without modifiers
1705 !if (!eq(HasIntClamp, 1),
1706 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp),
1707 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1711 /// XXX - src1 may only allow VGPRs?
1713 // The modifiers (except clamp) are dummy operands for the benefit of
1714 // printing and parsing. They defer their values to looking at the
1715 // srcN_modifiers for what to print.
1716 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1717 RegisterOperand Src2RC, int NumSrcArgs,
1719 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1720 dag ret = !if (!eq(NumSrcArgs, 2),
1722 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1723 Src1Mod:$src1_modifiers, Src1RC:$src1,
1725 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1726 neg_lo:$neg_lo, neg_hi:$neg_hi),
1727 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1728 Src1Mod:$src1_modifiers, Src1RC:$src1,
1729 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1730 neg_lo:$neg_lo, neg_hi:$neg_hi)),
1731 // else NumSrcArgs == 3
1733 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1734 Src1Mod:$src1_modifiers, Src1RC:$src1,
1735 Src2Mod:$src2_modifiers, Src2RC:$src2,
1737 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1738 neg_lo:$neg_lo, neg_hi:$neg_hi),
1739 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1740 Src1Mod:$src1_modifiers, Src1RC:$src1,
1741 Src2Mod:$src2_modifiers, Src2RC:$src2,
1742 op_sel:$op_sel, op_sel_hi:$op_sel_hi,
1743 neg_lo:$neg_lo, neg_hi:$neg_hi))
1747 class getInsVOP3OpSel <RegisterOperand Src0RC,
1748 RegisterOperand Src1RC,
1749 RegisterOperand Src2RC,
1755 dag ret = !if (!eq(NumSrcArgs, 2),
1757 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1758 Src1Mod:$src1_modifiers, Src1RC:$src1,
1761 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1762 Src1Mod:$src1_modifiers, Src1RC:$src1,
1764 // else NumSrcArgs == 3
1766 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1767 Src1Mod:$src1_modifiers, Src1RC:$src1,
1768 Src2Mod:$src2_modifiers, Src2RC:$src2,
1771 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1772 Src1Mod:$src1_modifiers, Src1RC:$src1,
1773 Src2Mod:$src2_modifiers, Src2RC:$src2,
1778 class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1779 int NumSrcArgs, bit HasModifiers,
1780 Operand Src0Mod, Operand Src1Mod> {
1782 dag ret = !if (!eq(NumSrcArgs, 0),
1783 // VOP1 without input operands (V_NOP)
1784 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1785 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl),
1786 !if (!eq(NumSrcArgs, 1),
1787 !if (!eq(HasModifiers, 1),
1788 // VOP1_DPP with modifiers
1789 (ins DstRC:$old, Src0Mod:$src0_modifiers,
1790 Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1791 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1793 // VOP1_DPP without modifiers
1794 (ins DstRC:$old, Src0RC:$src0,
1795 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1796 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1798 /* NumSrcArgs == 2 */,
1799 !if (!eq(HasModifiers, 1),
1800 // VOP2_DPP with modifiers
1802 Src0Mod:$src0_modifiers, Src0RC:$src0,
1803 Src1Mod:$src1_modifiers, Src1RC:$src1,
1804 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1805 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
1807 // VOP2_DPP without modifiers
1809 Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
1810 row_mask:$row_mask, bank_mask:$bank_mask,
1811 bound_ctrl:$bound_ctrl)
1815 class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1816 int NumSrcArgs, bit HasModifiers,
1817 Operand Src0Mod, Operand Src1Mod> {
1818 dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
1819 HasModifiers, Src0Mod, Src1Mod>.ret,
1823 class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
1824 int NumSrcArgs, bit HasModifiers,
1825 Operand Src0Mod, Operand Src1Mod> {
1826 dag ret = !if (!eq(NumSrcArgs, 0),
1827 // VOP1 without input operands (V_NOP)
1828 (ins dpp8:$dpp8, FI:$fi),
1829 !if (!eq(NumSrcArgs, 1),
1830 !if (!eq(HasModifiers, 1),
1831 // VOP1_DPP with modifiers
1832 (ins DstRC:$old, Src0Mod:$src0_modifiers,
1833 Src0RC:$src0, dpp8:$dpp8, FI:$fi)
1835 // VOP1_DPP without modifiers
1836 (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
1838 /* NumSrcArgs == 2 */,
1839 !if (!eq(HasModifiers, 1),
1840 // VOP2_DPP with modifiers
1842 Src0Mod:$src0_modifiers, Src0RC:$src0,
1843 Src1Mod:$src1_modifiers, Src1RC:$src1,
1846 // VOP2_DPP without modifiers
1848 Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
1854 class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1855 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1858 dag ret = !if(!eq(NumSrcArgs, 0),
1859 // VOP1 without input operands (V_NOP)
1861 !if(!eq(NumSrcArgs, 1),
1863 !if(!eq(HasSDWAOMod, 0),
1864 // VOP1_SDWA without omod
1865 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1867 dst_sel:$dst_sel, dst_unused:$dst_unused,
1868 src0_sel:$src0_sel),
1869 // VOP1_SDWA with omod
1870 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1871 clampmod:$clamp, omod:$omod,
1872 dst_sel:$dst_sel, dst_unused:$dst_unused,
1873 src0_sel:$src0_sel)),
1874 !if(!eq(NumSrcArgs, 2),
1875 !if(!eq(DstVT.Size, 1),
1877 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1878 Src1Mod:$src1_modifiers, Src1RC:$src1,
1879 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1881 !if(!eq(HasSDWAOMod, 0),
1882 // VOP2_SDWA without omod
1883 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1884 Src1Mod:$src1_modifiers, Src1RC:$src1,
1886 dst_sel:$dst_sel, dst_unused:$dst_unused,
1887 src0_sel:$src0_sel, src1_sel:$src1_sel),
1888 // VOP2_SDWA with omod
1889 (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1890 Src1Mod:$src1_modifiers, Src1RC:$src1,
1891 clampmod:$clamp, omod:$omod,
1892 dst_sel:$dst_sel, dst_unused:$dst_unused,
1893 src0_sel:$src0_sel, src1_sel:$src1_sel))),
1894 (ins)/* endif */)));
1897 // Outs for DPP and SDWA
1898 class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
1899 dag ret = !if(HasDst,
1900 !if(!eq(DstVT.Size, 1),
1901 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1902 (outs DstRCExt:$vdst)),
1907 class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1908 dag ret = !if(HasDst,
1909 !if(!eq(DstVT.Size, 1),
1910 (outs DstRCSDWA:$sdst),
1911 (outs DstRCSDWA:$vdst)),
1915 // Returns the assembly string for the inputs and outputs of a VOP[12C]
1916 // instruction. This does not add the _e32 suffix, so it can be reused
1918 class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1919 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1920 string src0 = ", $src0";
1921 string src1 = ", $src1";
1922 string src2 = ", $src2";
1923 string ret = !if(HasDst, dst, "") #
1924 !if(!eq(NumSrcArgs, 1), src0, "") #
1925 !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1926 !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1929 // Returns the assembly string for the inputs and outputs of a VOP3
1931 class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
1932 bit HasOMod, ValueType DstVT = i32> {
1933 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1934 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1935 string src1 = !if(!eq(NumSrcArgs, 1), "",
1936 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1937 " $src1_modifiers,"));
1938 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
1939 string iclamp = !if(HasIntClamp, "$clamp", "");
1941 !if(!eq(HasModifiers, 0),
1942 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp,
1943 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
1946 // Returns the assembly string for the inputs and outputs of a VOP3P
1948 class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
1949 bit HasClamp, ValueType DstVT = i32> {
1950 string dst = " $vdst";
1951 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1952 string src1 = !if(!eq(NumSrcArgs, 1), "",
1953 !if(!eq(NumSrcArgs, 2), " $src1",
1955 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1957 string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
1958 string clamp = !if(HasClamp, "$clamp", "");
1960 // Each modifier is printed as an array of bits for each operand, so
1961 // all operands are printed as part of src0_modifiers.
1962 string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
1965 class getAsmVOP3OpSel <int NumSrcArgs,
1970 string dst = " $vdst";
1972 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1973 string isrc1 = !if(!eq(NumSrcArgs, 1), "",
1974 !if(!eq(NumSrcArgs, 2), " $src1",
1976 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1978 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
1979 string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
1980 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
1981 " $src1_modifiers,"));
1982 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
1984 string src0 = !if(Src0HasMods, fsrc0, isrc0);
1985 string src1 = !if(Src1HasMods, fsrc1, isrc1);
1986 string src2 = !if(Src2HasMods, fsrc2, isrc2);
1988 string clamp = !if(HasClamp, "$clamp", "");
1990 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
1993 class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
1994 string dst = !if(HasDst,
1995 !if(!eq(DstVT.Size, 1),
1998 ""); // use $sdst for VOPC
1999 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2000 string src1 = !if(!eq(NumSrcArgs, 1), "",
2001 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2002 " $src1_modifiers,"));
2003 string args = !if(!eq(HasModifiers, 0),
2004 getAsm32<0, NumSrcArgs, DstVT>.ret,
2006 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2009 class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2010 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
2013 class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2014 string dst = !if(HasDst,
2015 !if(!eq(DstVT.Size, 1),
2018 ""); // use $sdst for VOPC
2019 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2020 string src1 = !if(!eq(NumSrcArgs, 1), "",
2021 !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2022 " $src1_modifiers,"));
2023 string args = !if(!eq(HasModifiers, 0),
2024 getAsm32<0, NumSrcArgs, DstVT>.ret,
2026 string ret = dst#args#"$dpp8$fi";
2029 class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2030 string dst = !if(HasDst,
2031 !if(!eq(DstVT.Size, 1),
2032 " vcc", // use vcc token as dst for VOPC instructioins
2035 string src0 = "$src0_modifiers";
2036 string src1 = "$src1_modifiers";
2037 string args = !if(!eq(NumSrcArgs, 0),
2039 !if(!eq(NumSrcArgs, 1),
2041 ", "#src0#", "#src1#"$clamp"
2044 string sdwa = !if(!eq(NumSrcArgs, 0),
2046 !if(!eq(NumSrcArgs, 1),
2047 " $dst_sel $dst_unused $src0_sel",
2048 !if(!eq(DstVT.Size, 1),
2049 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2050 " $dst_sel $dst_unused $src0_sel $src1_sel"
2054 string ret = dst#args#sdwa;
2057 class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2058 ValueType DstVT = i32> {
2059 string dst = !if(HasDst,
2060 !if(!eq(DstVT.Size, 1),
2064 string src0 = "$src0_modifiers";
2065 string src1 = "$src1_modifiers";
2066 string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod");
2067 string args = !if(!eq(NumSrcArgs, 0), "",
2068 !if(!eq(NumSrcArgs, 1),
2073 string sdwa = !if(!eq(NumSrcArgs, 0), "",
2074 !if(!eq(NumSrcArgs, 1),
2075 out_mods#" $dst_sel $dst_unused $src0_sel",
2076 !if(!eq(DstVT.Size, 1),
2077 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2078 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2082 string ret = dst#args#sdwa;
2086 // Function that checks if instruction supports DPP and SDWA
2087 class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2088 ValueType Src1VT = i32> {
2089 bit ret = !if(!eq(NumSrcArgs, 3),
2090 0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
2091 !if(!eq(DstVT.Size, 64),
2092 0, // 64-bit dst - No DPP or SDWA for 64-bit operands
2093 !if(!eq(Src0VT.Size, 64),
2095 !if(!eq(Src1VT.Size, 64),
2104 class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2105 ValueType Src1VT = i32> {
2106 bit ret = !if(!eq(NumSrcArgs, 0), 0,
2107 getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2110 class BitOr<bit a, bit b> {
2111 bit ret = !if(a, 1, !if(b, 1, 0));
2114 class BitAnd<bit a, bit b> {
2115 bit ret = !if(a, !if(b, 1, 0), 0);
2123 class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
2124 bit _EnableClamp = 0> {
2126 field list<ValueType> ArgVT = _ArgVT;
2127 field bit EnableF32SrcMods = _EnableF32SrcMods;
2128 field bit EnableClamp = _EnableClamp;
2130 field ValueType DstVT = ArgVT[0];
2131 field ValueType Src0VT = ArgVT[1];
2132 field ValueType Src1VT = ArgVT[2];
2133 field ValueType Src2VT = ArgVT[3];
2134 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2135 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
2136 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2137 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
2138 field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2139 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2140 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2141 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2142 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
2143 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
2144 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2145 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2146 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
2147 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
2148 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
2149 field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
2150 field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
2151 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2152 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2155 field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
2156 field bit HasDst32 = HasDst;
2157 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2158 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2159 field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
2160 field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
2161 field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
2163 // TODO: Modifiers logic is somewhat adhoc here, to be refined later
2164 // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
2165 // enables modifiers for i32 type.
2166 field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
2168 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
2169 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
2170 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
2171 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
2173 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
2174 field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2175 field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2176 field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2178 field bit HasSrc0Mods = HasModifiers;
2179 field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
2180 field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
2182 field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
2183 field bit HasSDWAClamp = EmitDst;
2184 field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
2185 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
2186 field bit HasClampLo = HasClamp;
2187 field bit HasClampHi = BitAnd<isPackedType<DstVT>.ret, HasClamp>.ret;
2188 field bit HasHigh = 0;
2190 field bit IsPacked = isPackedType<Src0VT>.ret;
2191 field bit HasOpSel = IsPacked;
2192 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
2193 field bit HasSDWAOMod = isFloatType<DstVT>.ret;
2195 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2196 field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2197 field bit HasExtSDWA = HasExt;
2198 field bit HasExtSDWA9 = HasExt;
2199 field int NeedPatGen = PatGenMode.NoPattern;
2201 field bit IsMAI = 0;
2202 field bit IsDOT = 0;
2204 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2205 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2206 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2208 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2210 // VOP3b instructions are a special case with a second explicit
2211 // output. This is manually overridden for them.
2212 field dag Outs32 = Outs;
2213 field dag Outs64 = Outs;
2214 field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
2215 field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
2216 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2218 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2219 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2220 HasIntClamp, HasModifiers, HasSrc2Mods,
2221 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2222 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2223 NumSrcArgs, HasClamp,
2224 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2225 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2228 getOpSelMod<Src0VT>.ret,
2229 getOpSelMod<Src1VT>.ret,
2230 getOpSelMod<Src2VT>.ret>.ret;
2231 field dag InsDPP = !if(HasExtDPP,
2232 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
2233 HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
2235 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
2236 HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
2237 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
2238 Src0ModDPP, Src1ModDPP>.ret;
2239 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2240 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2244 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2245 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
2246 field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
2247 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2251 HasSrc2FloatMods>.ret;
2252 field string AsmDPP = !if(HasExtDPP,
2253 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2254 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2255 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
2256 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2257 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2259 field string TieRegDPP = "$old";
2262 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2266 let HasExtSDWA9 = 0;
2269 class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
2270 let NeedPatGen = mode;
2273 def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
2274 def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2275 def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2277 def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2278 def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2279 def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2280 def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2281 def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>;
2283 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2284 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2286 def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2288 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2289 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2290 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2292 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2293 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2294 def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2295 def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2297 def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2299 def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2301 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2302 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2303 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2304 def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2305 def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2306 def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2307 def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2308 def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2309 def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2310 def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2311 def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2313 def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2314 def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2315 def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2316 def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2317 def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2318 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2319 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2320 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2321 def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
2322 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2323 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2325 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2326 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2327 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2329 def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2330 def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2331 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2332 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2333 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2334 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2335 def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2336 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2337 def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2339 def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2340 def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2342 def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
2343 def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
2344 def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
2345 def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
2346 def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2347 def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2348 def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
2349 def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2350 def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2351 def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
2352 def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
2353 def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
2355 class Commutable_REV <string revOp, bit isOrig> {
2356 string RevOp = revOp;
2357 bit IsOrig = isOrig;
2360 class AtomicNoRet <string noRetOp, bit isRet> {
2361 string NoRetOp = noRetOp;
2365 //===----------------------------------------------------------------------===//
2366 // Interpolation opcodes
2367 //===----------------------------------------------------------------------===//
2369 class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2371 class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2372 VINTRPCommon <outs, ins, "", pattern>,
2373 SIMCInstr<opName, SIEncodingFamily.NONE> {
2375 let isCodeGenOnly = 1;
2378 // FIXME-GFX10: WIP.
2379 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2380 string asm, int encodingFamily> :
2381 VINTRPCommon <outs, ins, asm, []>,
2383 SIMCInstr<opName, encodingFamily> {
2384 let DisableDecoder = DisableSIDecoder;
2387 class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2389 VINTRPCommon <outs, ins, asm, []>,
2391 SIMCInstr<opName, SIEncodingFamily.VI> {
2392 let AssemblerPredicate = VIAssemblerPredicate;
2393 let DecoderNamespace = "GFX8";
2394 let DisableDecoder = DisableVIDecoder;
2397 // FIXME-GFX10: WIP.
2398 multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2399 list<dag> pattern = []> {
2400 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2402 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2403 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2404 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2406 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2408 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
2409 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2410 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
2412 //===----------------------------------------------------------------------===//
2413 // Vector instruction mappings
2414 //===----------------------------------------------------------------------===//
2416 // Maps an opcode in e32 form to its e64 equivalent
2417 def getVOPe64 : InstrMapping {
2418 let FilterClass = "VOP";
2419 let RowFields = ["OpName"];
2420 let ColFields = ["Size", "VOP3"];
2421 let KeyCol = ["4", "0"];
2422 let ValueCols = [["8", "1"]];
2425 // Maps an opcode in e64 form to its e32 equivalent
2426 def getVOPe32 : InstrMapping {
2427 let FilterClass = "VOP";
2428 let RowFields = ["OpName"];
2429 let ColFields = ["Size", "VOP3"];
2430 let KeyCol = ["8", "1"];
2431 let ValueCols = [["4", "0"]];
2434 // Maps ordinary instructions to their SDWA counterparts
2435 def getSDWAOp : InstrMapping {
2436 let FilterClass = "VOP";
2437 let RowFields = ["OpName"];
2438 let ColFields = ["AsmVariantName"];
2439 let KeyCol = ["Default"];
2440 let ValueCols = [["SDWA"]];
2443 // Maps SDWA instructions to their ordinary counterparts
2444 def getBasicFromSDWAOp : InstrMapping {
2445 let FilterClass = "VOP";
2446 let RowFields = ["OpName"];
2447 let ColFields = ["AsmVariantName"];
2448 let KeyCol = ["SDWA"];
2449 let ValueCols = [["Default"]];
2452 // Maps ordinary instructions to their DPP counterparts
2453 def getDPPOp32 : InstrMapping {
2454 let FilterClass = "VOP";
2455 let RowFields = ["OpName"];
2456 let ColFields = ["AsmVariantName"];
2457 let KeyCol = ["Default"];
2458 let ValueCols = [["DPP"]];
2461 // Maps an commuted opcode to its original version
2462 def getCommuteOrig : InstrMapping {
2463 let FilterClass = "Commutable_REV";
2464 let RowFields = ["RevOp"];
2465 let ColFields = ["IsOrig"];
2467 let ValueCols = [["1"]];
2470 // Maps an original opcode to its commuted version
2471 def getCommuteRev : InstrMapping {
2472 let FilterClass = "Commutable_REV";
2473 let RowFields = ["RevOp"];
2474 let ColFields = ["IsOrig"];
2476 let ValueCols = [["0"]];
2479 def getMCOpcodeGen : InstrMapping {
2480 let FilterClass = "SIMCInstr";
2481 let RowFields = ["PseudoInstr"];
2482 let ColFields = ["Subtarget"];
2483 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2484 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2485 [!cast<string>(SIEncodingFamily.VI)],
2486 [!cast<string>(SIEncodingFamily.SDWA)],
2487 [!cast<string>(SIEncodingFamily.SDWA9)],
2488 // GFX80 encoding is added to work around a multiple matching
2489 // issue for buffer instructions with unpacked d16 data. This
2490 // does not actually change the encoding, and thus may be
2492 [!cast<string>(SIEncodingFamily.GFX80)],
2493 [!cast<string>(SIEncodingFamily.GFX9)],
2494 [!cast<string>(SIEncodingFamily.GFX10)],
2495 [!cast<string>(SIEncodingFamily.SDWA10)],
2496 [!cast<string>(SIEncodingFamily.GFX10_B)]];
2499 // Get equivalent SOPK instruction.
2500 def getSOPKOp : InstrMapping {
2501 let FilterClass = "SOPKInstTable";
2502 let RowFields = ["BaseCmpOp"];
2503 let ColFields = ["IsSOPK"];
2505 let ValueCols = [["1"]];
2508 def getAddr64Inst : InstrMapping {
2509 let FilterClass = "MUBUFAddr64Table";
2510 let RowFields = ["OpName"];
2511 let ColFields = ["IsAddr64"];
2513 let ValueCols = [["1"]];
2516 def getIfAddr64Inst : InstrMapping {
2517 let FilterClass = "MUBUFAddr64Table";
2518 let RowFields = ["OpName"];
2519 let ColFields = ["IsAddr64"];
2521 let ValueCols = [["1"]];
2524 def getMUBUFNoLdsInst : InstrMapping {
2525 let FilterClass = "MUBUFLdsTable";
2526 let RowFields = ["OpName"];
2527 let ColFields = ["IsLds"];
2529 let ValueCols = [["0"]];
2532 // Maps an atomic opcode to its version with a return value.
2533 def getAtomicRetOp : InstrMapping {
2534 let FilterClass = "AtomicNoRet";
2535 let RowFields = ["NoRetOp"];
2536 let ColFields = ["IsRet"];
2538 let ValueCols = [["1"]];
2541 // Maps an atomic opcode to its returnless version.
2542 def getAtomicNoRetOp : InstrMapping {
2543 let FilterClass = "AtomicNoRet";
2544 let RowFields = ["NoRetOp"];
2545 let ColFields = ["IsRet"];
2547 let ValueCols = [["0"]];
2550 // Maps a GLOBAL to its SADDR form.
2551 def getGlobalSaddrOp : InstrMapping {
2552 let FilterClass = "GlobalSaddrTable";
2553 let RowFields = ["SaddrOp"];
2554 let ColFields = ["IsSaddr"];
2556 let ValueCols = [["1"]];
2559 // Maps a v_cmpx opcode with sdst to opcode without sdst.
2560 def getVCMPXNoSDstOp : InstrMapping {
2561 let FilterClass = "VCMPXNoSDstTable";
2562 let RowFields = ["NoSDstOp"];
2563 let ColFields = ["HasSDst"];
2565 let ValueCols = [["0"]];
2568 // Maps a SOPP to a SOPP with S_NOP
2569 def getSOPPWithRelaxation : InstrMapping {
2570 let FilterClass = "Base_SOPP";
2571 let RowFields = ["AsmString"];
2572 let ColFields = ["Size"];
2574 let ValueCols = [["8"]];
2577 include "SIInstructions.td"
2579 include "DSInstructions.td"
2580 include "MIMGInstructions.td"