1 //===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
11 def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
13 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
17 class FLAT_Pseudo<string opName, dag outs, dag ins,
18 string asmOps, list<dag> pattern=[]> :
19 InstSI<outs, ins, "", pattern>,
20 SIMCInstr<opName, SIEncodingFamily.NONE> {
23 let isCodeGenOnly = 1;
25 let SubtargetPredicate = isCIVI;
28 // Internally, FLAT instruction are executed as both an LDS and a
29 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
30 // and are not considered done until both have been decremented.
34 let UseNamedOperandTable = 1;
35 let hasSideEffects = 0;
36 let SchedRW = [WriteVMEM];
38 string Mnemonic = opName;
39 string AsmOperands = asmOps;
41 bits<1> is_flat_global = 0;
42 bits<1> is_flat_scratch = 0;
49 // TODO: M0 if it could possibly access LDS (before gfx9? only)?
50 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
53 class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
54 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
58 let isCodeGenOnly = 0;
60 // copy relevant pseudo op flags
61 let SubtargetPredicate = ps.SubtargetPredicate;
62 let AsmMatchConverter = ps.AsmMatchConverter;
63 let TSFlags = ps.TSFlags;
64 let UseNamedOperandTable = ps.UseNamedOperandTable;
74 bits<1> lds = 0; // XXX - What does this actually do?
76 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
77 bits<2> seg = !if(ps.is_flat_global, 0b10,
78 !if(ps.is_flat_scratch, 0b01, 0));
80 // Signed offset. Highest bit ignored for flat and treated as 12-bit
81 // unsigned for flat acceses.
83 bits<1> nv = 0; // XXX - What does this actually do?
85 // We don't use tfe right now, and it was removed in gfx9.
88 // Only valid on GFX9+
89 let Inst{12-0} = offset;
91 let Inst{15-14} = seg;
93 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
96 let Inst{31-26} = 0x37; // Encoding.
97 let Inst{39-32} = vaddr;
98 let Inst{47-40} = !if(ps.has_data, vdata, ?);
100 let Inst{55} = nv; // nv on GFX9+, TFE before.
101 let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
104 class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
105 bit HasSignedOffset = 0> : FLAT_Pseudo<
107 (outs regClass:$vdst),
109 (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
110 (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
111 " $vdst, $vaddr$offset$glc$slc"> {
116 class FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> :
117 FLAT_Load_Pseudo<opName, regClass, 1> {
118 let is_flat_global = 1;
121 class FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> :
122 FLAT_Load_Pseudo<opName, regClass, 1> {
123 let is_flat_scratch = 1;
126 class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
127 bit HasSignedOffset = 0> : FLAT_Pseudo<
131 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
132 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)),
133 " $vaddr, $vdata$offset$glc$slc"> {
139 class FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> :
140 FLAT_Store_Pseudo<opName, regClass, 1> {
141 let is_flat_global = 1;
144 class FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> :
145 FLAT_Store_Pseudo<opName, regClass, 1> {
146 let is_flat_scratch = 1;
149 multiclass FLAT_Atomic_Pseudo<
151 RegisterClass vdst_rc,
153 SDPatternOperator atomic = null_frag,
154 ValueType data_vt = vt,
155 RegisterClass data_rc = vdst_rc,
156 bit HasSignedOffset = 0> {
158 def "" : FLAT_Pseudo <opName,
161 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
162 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
163 " $vaddr, $vdata$offset$slc",
165 AtomicNoRet <NAME, 0> {
171 let PseudoInstr = NAME;
174 def _RTN : FLAT_Pseudo <opName,
175 (outs vdst_rc:$vdst),
177 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
178 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
179 " $vdst, $vaddr, $vdata$offset glc$slc",
181 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
182 AtomicNoRet <NAME, 1> {
185 let hasPostISelHook = 1;
188 let PseudoInstr = NAME # "_RTN";
192 class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
193 (ops node:$ptr, node:$value),
194 (atomic_op node:$ptr, node:$value),
195 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
198 def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
199 def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
200 def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
201 def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
202 def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
203 def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
204 def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
205 def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
206 def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
207 def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
208 def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
209 def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
210 def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
214 //===----------------------------------------------------------------------===//
216 //===----------------------------------------------------------------------===//
218 def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
219 def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
220 def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
221 def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
222 def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
223 def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
224 def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
225 def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
227 def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
228 def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
229 def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
230 def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
231 def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
232 def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
234 defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
235 VGPR_32, i32, atomic_cmp_swap_flat,
238 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
239 VReg_64, i64, atomic_cmp_swap_flat,
242 defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
243 VGPR_32, i32, atomic_swap_flat>;
245 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
246 VReg_64, i64, atomic_swap_flat>;
248 defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
249 VGPR_32, i32, atomic_add_flat>;
251 defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
252 VGPR_32, i32, atomic_sub_flat>;
254 defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
255 VGPR_32, i32, atomic_min_flat>;
257 defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
258 VGPR_32, i32, atomic_umin_flat>;
260 defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
261 VGPR_32, i32, atomic_max_flat>;
263 defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
264 VGPR_32, i32, atomic_umax_flat>;
266 defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
267 VGPR_32, i32, atomic_and_flat>;
269 defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
270 VGPR_32, i32, atomic_or_flat>;
272 defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
273 VGPR_32, i32, atomic_xor_flat>;
275 defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
276 VGPR_32, i32, atomic_inc_flat>;
278 defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
279 VGPR_32, i32, atomic_dec_flat>;
281 defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
282 VReg_64, i64, atomic_add_flat>;
284 defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
285 VReg_64, i64, atomic_sub_flat>;
287 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
288 VReg_64, i64, atomic_min_flat>;
290 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
291 VReg_64, i64, atomic_umin_flat>;
293 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
294 VReg_64, i64, atomic_max_flat>;
296 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
297 VReg_64, i64, atomic_umax_flat>;
299 defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
300 VReg_64, i64, atomic_and_flat>;
302 defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
303 VReg_64, i64, atomic_or_flat>;
305 defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
306 VReg_64, i64, atomic_xor_flat>;
308 defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
309 VReg_64, i64, atomic_inc_flat>;
311 defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
312 VReg_64, i64, atomic_dec_flat>;
314 let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
316 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
317 VGPR_32, f32, null_frag, v2f32, VReg_64>;
319 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
320 VReg_64, f64, null_frag, v2f64, VReg_128>;
322 defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
325 defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
328 defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
331 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
334 } // End SubtargetPredicate = isCI
336 let SubtargetPredicate = HasFlatGlobalInsts in {
337 def GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
338 def GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
339 def GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
340 def GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
341 def GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
342 def GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
343 def GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
344 def GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
346 def GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
347 def GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
348 def GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
349 def GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
350 def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
351 def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
353 } // End SubtargetPredicate = HasFlatGlobalInsts
356 //===----------------------------------------------------------------------===//
358 //===----------------------------------------------------------------------===//
360 class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
362 auto const AS = cast<MemSDNode>(N)->getAddressSpace();
363 return AS == AMDGPUASI.FLAT_ADDRESS ||
364 AS == AMDGPUASI.GLOBAL_ADDRESS ||
365 AS == AMDGPUASI.CONSTANT_ADDRESS;
368 class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
369 (st node:$val, node:$ptr), [{
370 auto const AS = cast<MemSDNode>(N)->getAddressSpace();
371 return AS == AMDGPUASI.FLAT_ADDRESS ||
372 AS == AMDGPUASI.GLOBAL_ADDRESS;
375 def atomic_flat_load : flat_ld <atomic_load>;
376 def flat_load : flat_ld <load>;
377 def flat_az_extloadi8 : flat_ld <az_extloadi8>;
378 def flat_sextloadi8 : flat_ld <sextloadi8>;
379 def flat_az_extloadi16 : flat_ld <az_extloadi16>;
380 def flat_sextloadi16 : flat_ld <sextloadi16>;
382 def atomic_flat_store : flat_st <atomic_store>;
383 def flat_store : flat_st <store>;
384 def flat_truncstorei8 : flat_st <truncstorei8>;
385 def flat_truncstorei16 : flat_st <truncstorei16>;
387 // Patterns for global loads with no offset.
388 class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
389 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
390 (inst $vaddr, $offset, 0, $slc)
393 class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
394 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
395 (inst $vaddr, $offset, 1, $slc)
398 class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
399 (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
400 (inst $vaddr, $data, $offset, 0, $slc)
403 class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
404 // atomic store follows atomic binop convention so the address comes
406 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
407 (inst $vaddr, $data, $offset, 1, $slc)
410 class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
411 ValueType data_vt = vt> : Pat <
412 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
413 (inst $vaddr, $data, $offset, $slc)
416 let Predicates = [isCIVI] in {
418 def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
419 def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
420 def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
421 def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
422 def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
423 def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
424 def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
425 def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
426 def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
428 def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
429 def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
431 def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
432 def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
433 def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
434 def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
435 def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
437 def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
438 def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
440 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
441 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
442 def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
443 def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
444 def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
445 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
446 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
447 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
448 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
449 def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
450 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
451 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
452 def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
454 def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
455 def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
456 def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
457 def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
458 def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
459 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
460 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
461 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
462 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
463 def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
464 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
465 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
466 def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
468 } // End Predicates = [isCIVI]
470 let Predicates = [isVI] in {
471 def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
472 def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
476 //===----------------------------------------------------------------------===//
478 //===----------------------------------------------------------------------===//
480 //===----------------------------------------------------------------------===//
482 //===----------------------------------------------------------------------===//
484 class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
486 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
487 let AssemblerPredicate = isCIOnly;
488 let DecoderNamespace="CI";
491 def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>;
492 def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>;
493 def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>;
494 def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>;
495 def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>;
496 def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>;
497 def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>;
498 def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>;
500 def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
501 def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
502 def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
503 def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
504 def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
505 def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
507 multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
508 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
509 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
512 defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
513 defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
514 defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
515 defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
516 defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
517 defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
518 defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
519 defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
520 defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
521 defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
522 defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
523 defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
524 defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
525 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
526 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
527 defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
528 defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
529 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
530 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
531 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
532 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
533 defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
534 defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
535 defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
536 defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
537 defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
539 // CI Only flat instructions
540 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
541 defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
542 defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
543 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
544 defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
545 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
548 //===----------------------------------------------------------------------===//
550 //===----------------------------------------------------------------------===//
552 class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
554 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
555 let AssemblerPredicate = isVI;
556 let DecoderNamespace="VI";
559 def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
560 def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
561 def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
562 def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
563 def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
564 def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
565 def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
566 def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
568 def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
569 def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
570 def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
571 def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
572 def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
573 def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
575 multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
576 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
577 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
580 defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
581 defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
582 defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
583 defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
584 defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
585 defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
586 defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
587 defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
588 defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
589 defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
590 defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
591 defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
592 defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
593 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
594 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
595 defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
596 defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
597 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
598 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
599 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
600 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
601 defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
602 defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
603 defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
604 defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
605 defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
607 def GLOBAL_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, GLOBAL_LOAD_UBYTE>;
608 def GLOBAL_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, GLOBAL_LOAD_SBYTE>;
609 def GLOBAL_LOAD_USHORT_vi : FLAT_Real_vi <0x12, GLOBAL_LOAD_USHORT>;
610 def GLOBAL_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, GLOBAL_LOAD_SSHORT>;
611 def GLOBAL_LOAD_DWORD_vi : FLAT_Real_vi <0x14, GLOBAL_LOAD_DWORD>;
612 def GLOBAL_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, GLOBAL_LOAD_DWORDX2>;
613 def GLOBAL_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, GLOBAL_LOAD_DWORDX4>;
614 def GLOBAL_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, GLOBAL_LOAD_DWORDX3>;
616 def GLOBAL_STORE_BYTE_vi : FLAT_Real_vi <0x18, GLOBAL_STORE_BYTE>;
617 def GLOBAL_STORE_SHORT_vi : FLAT_Real_vi <0x1a, GLOBAL_STORE_SHORT>;
618 def GLOBAL_STORE_DWORD_vi : FLAT_Real_vi <0x1c, GLOBAL_STORE_DWORD>;
619 def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>;
620 def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>;
621 def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>;