1 //===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
10 def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
12 def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
13 def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
15 //===----------------------------------------------------------------------===//
17 //===----------------------------------------------------------------------===//
19 class FLAT_Pseudo<string opName, dag outs, dag ins,
20 string asmOps, list<dag> pattern=[]> :
21 InstSI<outs, ins, "", pattern>,
22 SIMCInstr<opName, SIEncodingFamily.NONE> {
25 let isCodeGenOnly = 1;
29 let UseNamedOperandTable = 1;
30 let hasSideEffects = 0;
31 let SchedRW = [WriteVMEM];
33 string Mnemonic = opName;
34 string AsmOperands = asmOps;
36 bits<1> is_flat_global = 0;
37 bits<1> is_flat_scratch = 0;
41 // We need to distinguish having saddr and enabling saddr because
42 // saddr is only valid for scratch and global instructions. Pre-gfx9
43 // these bits were reserved, so we also don't necessarily want to
44 // set these bits to the disabled value for the original flat
45 // segment instructions.
46 bits<1> has_saddr = 0;
47 bits<1> enabled_saddr = 0;
48 bits<7> saddr_value = 0;
49 bits<1> has_vaddr = 1;
57 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
58 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
60 // TODO: M0 if it could possibly access LDS (before gfx9? only)?
61 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
63 // Internally, FLAT instruction are executed as both an LDS and a
64 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
65 // and are not considered done until both have been decremented.
67 let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
69 let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
72 class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
73 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
77 let isCodeGenOnly = 0;
79 // copy relevant pseudo op flags
80 let SubtargetPredicate = ps.SubtargetPredicate;
81 let AsmMatchConverter = ps.AsmMatchConverter;
82 let TSFlags = ps.TSFlags;
83 let UseNamedOperandTable = ps.UseNamedOperandTable;
96 bits<1> lds = 0; // XXX - What does this actually do?
98 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
99 bits<2> seg = !if(ps.is_flat_global, 0b10,
100 !if(ps.is_flat_scratch, 0b01, 0));
102 // Signed offset. Highest bit ignored for flat and treated as 12-bit
103 // unsigned for flat accesses.
105 bits<1> nv = 0; // XXX - What does this actually do?
107 // We don't use tfe right now, and it was removed in gfx9.
110 // Only valid on GFX9+
111 let Inst{12-0} = offset;
113 let Inst{15-14} = seg;
115 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
117 let Inst{24-18} = op;
118 let Inst{31-26} = 0x37; // Encoding.
119 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
120 let Inst{47-40} = !if(ps.has_data, vdata, ?);
121 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
123 // 54-48 is reserved.
124 let Inst{55} = nv; // nv on GFX9+, TFE before.
125 let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
128 class GlobalSaddrTable <bit is_saddr, string Name = ""> {
129 bit IsSaddr = is_saddr;
130 string SaddrOp = Name;
133 // TODO: Is exec allowed for saddr? The disabled value 0x7f is the
134 // same encoding value as exec_hi, so it isn't possible to use that if
135 // saddr is 32-bit (which isn't handled here yet).
136 class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
137 bit HasTiedOutput = 0,
138 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
140 (outs regClass:$vdst),
143 !con((ins VReg_64:$vaddr),
144 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
145 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
146 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
147 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
150 let has_saddr = HasSaddr;
151 let enabled_saddr = EnableSaddr;
152 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
155 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
156 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
159 class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
160 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
164 !con((ins VReg_64:$vaddr, vdataClass:$vdata),
165 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
166 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
167 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
171 let has_saddr = HasSaddr;
172 let enabled_saddr = EnableSaddr;
173 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
177 multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
178 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
179 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
180 GlobalSaddrTable<0, opName>;
181 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
182 GlobalSaddrTable<1, opName>;
186 class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
187 bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
189 (outs regClass:$vdst),
190 !con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
191 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
192 " $vdst, $saddr$offset$glc$slc$dlc"> {
193 let is_flat_global = 1;
198 let enabled_saddr = 1;
201 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
202 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
205 multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
206 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
207 def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
208 GlobalSaddrTable<0, opName>;
209 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
210 GlobalSaddrTable<1, opName>;
214 class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
215 bit HasSignedOffset = 0> : FLAT_Pseudo<
219 (ins vdataClass:$vdata, SReg_64:$saddr),
220 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
221 " $vdata, $saddr$offset$glc$slc$dlc"> {
222 let is_flat_global = 1;
228 let enabled_saddr = 1;
232 class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
233 bit EnableSaddr = 0>: FLAT_Pseudo<
235 (outs regClass:$vdst),
237 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
238 (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
239 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
243 let enabled_saddr = EnableSaddr;
244 let has_vaddr = !if(EnableSaddr, 0, 1);
245 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
249 class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
253 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
254 (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
255 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
260 let enabled_saddr = EnableSaddr;
261 let has_vaddr = !if(EnableSaddr, 0, 1);
262 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
266 multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
267 let is_flat_scratch = 1 in {
268 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
269 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
273 multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
274 let is_flat_scratch = 1 in {
275 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
276 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
280 class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
281 string asm, list<dag> pattern = []> :
282 FLAT_Pseudo<opName, outs, ins, asm, pattern> {
293 class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
294 string asm, list<dag> pattern = []>
295 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
296 let hasPostISelHook = 1;
300 let PseudoInstr = NAME # "_RTN";
303 multiclass FLAT_Atomic_Pseudo<
305 RegisterClass vdst_rc,
307 SDPatternOperator atomic = null_frag,
308 ValueType data_vt = vt,
309 RegisterClass data_rc = vdst_rc,
310 bit isFP = isFloatType<data_vt>.ret> {
311 def "" : FLAT_AtomicNoRet_Pseudo <opName,
313 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
314 " $vaddr, $vdata$offset$slc">,
315 GlobalSaddrTable<0, opName>,
316 AtomicNoRet <opName, 0> {
317 let PseudoInstr = NAME;
319 let AddedComplexity = -1; // Prefer global atomics if available
322 def _RTN : FLAT_AtomicRet_Pseudo <opName,
323 (outs vdst_rc:$vdst),
324 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
325 " $vdst, $vaddr, $vdata$offset glc$slc",
327 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
328 GlobalSaddrTable<0, opName#"_rtn">,
329 AtomicNoRet <opName, 1>{
331 let AddedComplexity = -1; // Prefer global atomics if available
335 multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
337 RegisterClass vdst_rc,
339 SDPatternOperator atomic = null_frag,
340 ValueType data_vt = vt,
341 RegisterClass data_rc = vdst_rc,
342 bit isFP = isFloatType<data_vt>.ret> {
344 def "" : FLAT_AtomicNoRet_Pseudo <opName,
346 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
347 " $vaddr, $vdata, off$offset$slc">,
348 GlobalSaddrTable<0, opName>,
349 AtomicNoRet <opName, 0> {
351 let PseudoInstr = NAME;
355 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
357 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
358 " $vaddr, $vdata, $saddr$offset$slc">,
359 GlobalSaddrTable<1, opName>,
360 AtomicNoRet <opName#"_saddr", 0> {
362 let enabled_saddr = 1;
363 let PseudoInstr = NAME#"_SADDR";
368 multiclass FLAT_Global_Atomic_Pseudo_RTN<
370 RegisterClass vdst_rc,
372 SDPatternOperator atomic = null_frag,
373 ValueType data_vt = vt,
374 RegisterClass data_rc = vdst_rc,
375 bit isFP = isFloatType<data_vt>.ret> {
377 def _RTN : FLAT_AtomicRet_Pseudo <opName,
378 (outs vdst_rc:$vdst),
379 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
380 " $vdst, $vaddr, $vdata, off$offset glc$slc",
382 (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
383 GlobalSaddrTable<0, opName#"_rtn">,
384 AtomicNoRet <opName, 1> {
389 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
390 (outs vdst_rc:$vdst),
391 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
392 " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
393 GlobalSaddrTable<1, opName#"_rtn">,
394 AtomicNoRet <opName#"_saddr", 1> {
396 let enabled_saddr = 1;
397 let PseudoInstr = NAME#"_SADDR_RTN";
402 multiclass FLAT_Global_Atomic_Pseudo<
404 RegisterClass vdst_rc,
406 SDPatternOperator atomic_rtn = null_frag,
407 SDPatternOperator atomic_no_rtn = null_frag,
408 ValueType data_vt = vt,
409 RegisterClass data_rc = vdst_rc> {
410 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
411 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
412 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
416 //===----------------------------------------------------------------------===//
418 //===----------------------------------------------------------------------===//
420 def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
421 def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
422 def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
423 def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
424 def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
425 def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
426 def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
427 def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
429 def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
430 def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
431 def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
432 def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
433 def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
434 def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
436 let SubtargetPredicate = HasD16LoadStore in {
437 def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
438 def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
439 def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
440 def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
441 def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
442 def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
444 def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
445 def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
448 defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
449 VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
452 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
453 VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
456 defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
457 VGPR_32, i32, atomic_swap_flat_32>;
459 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
460 VReg_64, i64, atomic_swap_flat_64>;
462 defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
463 VGPR_32, i32, atomic_load_add_flat_32>;
465 defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
466 VGPR_32, i32, atomic_load_sub_flat_32>;
468 defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
469 VGPR_32, i32, atomic_load_min_flat_32>;
471 defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
472 VGPR_32, i32, atomic_load_umin_flat_32>;
474 defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
475 VGPR_32, i32, atomic_load_max_flat_32>;
477 defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
478 VGPR_32, i32, atomic_load_umax_flat_32>;
480 defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
481 VGPR_32, i32, atomic_load_and_flat_32>;
483 defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
484 VGPR_32, i32, atomic_load_or_flat_32>;
486 defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
487 VGPR_32, i32, atomic_load_xor_flat_32>;
489 defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
490 VGPR_32, i32, atomic_inc_flat_32>;
492 defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
493 VGPR_32, i32, atomic_dec_flat_32>;
495 defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
496 VReg_64, i64, atomic_load_add_flat_64>;
498 defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
499 VReg_64, i64, atomic_load_sub_flat_64>;
501 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
502 VReg_64, i64, atomic_load_min_flat_64>;
504 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
505 VReg_64, i64, atomic_load_umin_flat_64>;
507 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
508 VReg_64, i64, atomic_load_max_flat_64>;
510 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
511 VReg_64, i64, atomic_load_umax_flat_64>;
513 defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
514 VReg_64, i64, atomic_load_and_flat_64>;
516 defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
517 VReg_64, i64, atomic_load_or_flat_64>;
519 defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
520 VReg_64, i64, atomic_load_xor_flat_64>;
522 defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
523 VReg_64, i64, atomic_inc_flat_64>;
525 defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
526 VReg_64, i64, atomic_dec_flat_64>;
528 // GFX7-, GFX10-only flat instructions.
529 let SubtargetPredicate = isGFX7GFX10 in {
531 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
532 VGPR_32, f32, null_frag, v2f32, VReg_64>;
534 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
535 VReg_64, f64, null_frag, v2f64, VReg_128>;
537 defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
540 defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
543 defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
546 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
549 } // End SubtargetPredicate = isGFX7GFX10
551 defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
552 defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
553 defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
554 defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
555 defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
556 defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
557 defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
558 defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
560 defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
561 defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
562 defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
563 defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
564 defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
565 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
566 let OtherPredicates = [HasGFX10_BEncoding] in
567 def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
569 defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
570 defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
571 defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
572 defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
573 defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
574 defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
575 let OtherPredicates = [HasGFX10_BEncoding] in
576 def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
578 defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
579 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
581 let is_flat_global = 1 in {
582 defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
583 VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
586 defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
587 VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
591 defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
592 VGPR_32, i32, atomic_swap_global_32>;
594 defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
595 VReg_64, i64, atomic_swap_global_64>;
597 defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
598 VGPR_32, i32, atomic_load_add_global_32>;
600 defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
601 VGPR_32, i32, atomic_load_sub_global_32>;
603 defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
604 VGPR_32, i32, atomic_load_min_global_32>;
606 defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
607 VGPR_32, i32, atomic_load_umin_global_32>;
609 defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
610 VGPR_32, i32, atomic_load_max_global_32>;
612 defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
613 VGPR_32, i32, atomic_load_umax_global_32>;
615 defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
616 VGPR_32, i32, atomic_load_and_global_32>;
618 defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
619 VGPR_32, i32, atomic_load_or_global_32>;
621 defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
622 VGPR_32, i32, atomic_load_xor_global_32>;
624 defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
625 VGPR_32, i32, atomic_inc_global_32>;
627 defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
628 VGPR_32, i32, atomic_dec_global_32>;
630 defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
631 VReg_64, i64, atomic_load_add_global_64>;
633 defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
634 VReg_64, i64, atomic_load_sub_global_64>;
636 defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
637 VReg_64, i64, atomic_load_min_global_64>;
639 defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
640 VReg_64, i64, atomic_load_umin_global_64>;
642 defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
643 VReg_64, i64, atomic_load_max_global_64>;
645 defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
646 VReg_64, i64, atomic_load_umax_global_64>;
648 defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
649 VReg_64, i64, atomic_load_and_global_64>;
651 defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
652 VReg_64, i64, atomic_load_or_global_64>;
654 defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
655 VReg_64, i64, atomic_load_xor_global_64>;
657 defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
658 VReg_64, i64, atomic_inc_global_64>;
660 defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
661 VReg_64, i64, atomic_dec_global_64>;
663 let SubtargetPredicate = HasGFX10_BEncoding in
664 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
665 VGPR_32, i32, atomic_csub_global_32>;
666 } // End is_flat_global = 1
670 let SubtargetPredicate = HasFlatScratchInsts in {
671 defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
672 defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
673 defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
674 defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
675 defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
676 defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
677 defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
678 defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
680 defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
681 defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
682 defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
683 defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
684 defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
685 defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
687 defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
688 defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
689 defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
690 defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
691 defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
692 defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
694 defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
695 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
697 } // End SubtargetPredicate = HasFlatScratchInsts
699 let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
700 defm GLOBAL_ATOMIC_FCMPSWAP :
701 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
702 defm GLOBAL_ATOMIC_FMIN :
703 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
704 defm GLOBAL_ATOMIC_FMAX :
705 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
706 defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
707 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
708 defm GLOBAL_ATOMIC_FMIN_X2 :
709 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
710 defm GLOBAL_ATOMIC_FMAX_X2 :
711 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
712 } // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
714 let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
716 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
717 "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
719 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
720 "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
723 } // End SubtargetPredicate = HasAtomicFaddInsts
725 //===----------------------------------------------------------------------===//
727 //===----------------------------------------------------------------------===//
729 // Patterns for global loads with no offset.
730 class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
731 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
732 (inst $vaddr, $offset, 0, 0, $slc)
735 class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
736 (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
737 (inst $vaddr, $offset, 0, 0, $slc, $in)
740 class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
741 (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
742 (inst $vaddr, $offset, 0, 0, $slc, $in)
745 class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
746 (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
747 (inst $vaddr, $offset, 0, 0, $slc)
750 class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
751 (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
752 (inst $vaddr, $offset, 0, 0, $slc)
755 class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
756 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
757 (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
760 class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
761 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
762 (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
765 class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
766 // atomic store follows atomic binop convention so the address comes
768 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
769 (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
772 class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
773 // atomic store follows atomic binop convention so the address comes
775 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
776 (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
779 class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
780 ValueType data_vt = vt> : GCNPat <
781 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
782 (inst $vaddr, $data, $offset, $slc)
785 class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
786 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
787 (inst $vaddr, $data, $offset, $slc)
790 class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
791 ValueType data_vt = vt> : GCNPat <
792 (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
793 (inst $vaddr, $data, $offset, $slc)
796 let OtherPredicates = [HasFlatAddressSpace] in {
798 def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
799 def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
800 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
801 def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
802 def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
803 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
804 def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
805 def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
806 def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
807 def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
808 def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
810 def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
811 def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
813 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
814 def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
816 foreach vt = Reg32Types.types in {
817 def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
818 def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
821 foreach vt = VReg_64.RegTypes in {
822 def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
823 def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
826 def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
828 foreach vt = VReg_128.RegTypes in {
829 def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
830 def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>;
833 def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
834 def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
836 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
837 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
838 def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
839 def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
840 def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
841 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
842 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
843 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
844 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
845 def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
846 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
847 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
848 def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
850 def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
851 def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
852 def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
853 def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
854 def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
855 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
856 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
857 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
858 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
859 def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
860 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
861 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
862 def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
864 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
865 def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
867 let OtherPredicates = [D16PreservesUnusedBits] in {
868 def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
869 def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
871 def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
872 def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
873 def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
874 def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
875 def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
876 def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
878 def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
879 def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
880 def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
881 def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
882 def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
883 def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
886 } // End OtherPredicates = [HasFlatAddressSpace]
888 let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
890 def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
891 def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
892 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
893 def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
894 def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
895 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
896 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
897 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
898 def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
899 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
901 foreach vt = Reg32Types.types in {
902 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
903 def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
906 foreach vt = VReg_64.RegTypes in {
907 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
908 def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
911 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
913 foreach vt = VReg_128.RegTypes in {
914 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>;
915 def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>;
918 def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
919 def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
921 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
922 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
923 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
924 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
925 def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
927 let OtherPredicates = [D16PreservesUnusedBits] in {
928 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
929 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
931 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
932 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
933 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
934 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
935 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
936 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
938 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
939 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
940 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
941 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
942 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
943 def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
946 def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
947 def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>;
949 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
950 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
951 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
952 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
953 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
954 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
955 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
956 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
957 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
958 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
959 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
960 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
961 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
962 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>;
964 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
965 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
966 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
967 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
968 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
969 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
970 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
971 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
972 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
973 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
974 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
975 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
976 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
978 def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
979 def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
981 } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
984 //===----------------------------------------------------------------------===//
986 //===----------------------------------------------------------------------===//
988 //===----------------------------------------------------------------------===//
990 //===----------------------------------------------------------------------===//
992 class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
994 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
995 let AssemblerPredicate = isGFX7Only;
996 let DecoderNamespace="GFX7";
999 def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>;
1000 def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>;
1001 def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>;
1002 def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>;
1003 def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>;
1004 def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>;
1005 def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>;
1006 def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>;
1008 def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1009 def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1010 def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1011 def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1012 def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1013 def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1015 multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
1016 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1017 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1020 defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
1021 defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
1022 defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
1023 defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
1024 defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
1025 defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
1026 defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
1027 defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
1028 defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
1029 defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
1030 defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
1031 defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
1032 defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
1033 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
1034 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1035 defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1036 defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1037 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1038 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1039 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1040 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1041 defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1042 defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1043 defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1044 defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1045 defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1047 // CI Only flat instructions
1048 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1049 defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1050 defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1051 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1052 defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1053 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1056 //===----------------------------------------------------------------------===//
1058 //===----------------------------------------------------------------------===//
1060 class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
1062 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1063 let AssemblerPredicate = isGFX8GFX9;
1064 let DecoderNamespace = "GFX8";
1067 multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
1068 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
1069 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1072 def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1073 def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1074 def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1075 def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1076 def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1077 def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1078 def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1079 def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1081 def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1082 def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1083 def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1084 def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1085 def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1086 def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1087 def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1088 def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1090 def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1091 def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1092 def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1093 def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1094 def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1095 def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1097 multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
1098 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1099 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1102 multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
1103 FLAT_Real_AllAddr_vi<op> {
1104 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1105 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1109 defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1110 defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1111 defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1112 defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1113 defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1114 defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1115 defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1116 defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1117 defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1118 defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1119 defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1120 defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1121 defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1122 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1123 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1124 defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1125 defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1126 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1127 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1128 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1129 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1130 defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1131 defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1132 defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1133 defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1134 defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1136 defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1137 defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1138 defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1139 defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1140 defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1141 defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1142 defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1143 defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1145 defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>;
1146 defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1147 defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>;
1148 defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1149 defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>;
1150 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1152 defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1153 defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1154 defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1155 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1156 defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1157 defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1158 defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1159 defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1162 defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>;
1163 defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>;
1164 defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>;
1165 defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>;
1166 defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>;
1167 defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>;
1168 defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>;
1169 defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>;
1170 defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>;
1171 defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>;
1172 defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>;
1173 defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>;
1174 defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>;
1175 defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>;
1176 defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1177 defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>;
1178 defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>;
1179 defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>;
1180 defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>;
1181 defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>;
1182 defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>;
1183 defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>;
1184 defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>;
1185 defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>;
1186 defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>;
1187 defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>;
1189 defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1190 defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1191 defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1192 defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1193 defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1194 defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1195 defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1196 defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1197 defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1198 defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1199 defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>;
1200 defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1201 defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>;
1202 defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1203 defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>;
1204 defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1205 defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1206 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1207 defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1208 defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1209 defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1210 defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1213 //===----------------------------------------------------------------------===//
1215 //===----------------------------------------------------------------------===//
1217 class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1218 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1219 let AssemblerPredicate = isGFX10Plus;
1220 let DecoderNamespace = "GFX10";
1222 let Inst{11-0} = offset{11-0};
1223 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
1224 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1229 multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1231 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1234 multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1236 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1239 multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1241 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1244 multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1245 def _SADDR_RTN_gfx10 :
1246 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1250 multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
1251 FLAT_Real_Base_gfx10<op>,
1252 FLAT_Real_SADDR_gfx10<op>;
1254 multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
1255 FLAT_Real_Base_gfx10<op>,
1256 FLAT_Real_RTN_gfx10<op>;
1258 multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
1259 FLAT_Real_AllAddr_gfx10<op>,
1260 FLAT_Real_RTN_gfx10<op>,
1261 FLAT_Real_SADDR_RTN_gfx10<op>;
1263 multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
1264 FLAT_Real_RTN_gfx10<op>,
1265 FLAT_Real_SADDR_RTN_gfx10<op>;
1268 defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
1269 defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>;
1270 defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>;
1271 defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>;
1272 defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>;
1273 defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>;
1274 defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>;
1275 defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>;
1276 defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>;
1277 defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>;
1278 defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>;
1279 defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>;
1280 defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>;
1281 defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>;
1282 defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>;
1283 defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>;
1284 defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>;
1285 defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>;
1286 defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>;
1287 defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>;
1288 defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>;
1289 defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>;
1290 defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>;
1291 defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>;
1292 defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>;
1293 defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>;
1294 defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>;
1295 defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>;
1296 defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>;
1297 defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>;
1298 defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>;
1299 defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>;
1300 defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>;
1301 defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>;
1302 defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>;
1303 defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>;
1304 defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>;
1305 defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>;
1306 defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>;
1307 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>;
1308 defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>;
1309 defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>;
1310 defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>;
1311 defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>;
1312 defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>;
1313 defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>;
1314 defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>;
1315 defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>;
1316 defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>;
1317 defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>;
1318 defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>;
1319 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>;
1320 defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>;
1321 defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>;
1325 defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
1326 defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
1327 defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
1328 defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
1329 defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
1330 defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
1331 defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
1332 defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
1333 defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
1334 defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
1335 defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
1336 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1337 defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
1338 defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
1339 defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
1340 defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
1341 defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
1342 defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
1343 defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
1344 defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
1345 defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
1346 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
1347 defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
1348 defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
1349 defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
1350 defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
1351 defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
1352 defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
1353 defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
1354 defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
1355 defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>;
1356 defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>;
1357 defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1358 defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1359 defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1360 defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1361 defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1362 defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1363 defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>;
1364 defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>;
1365 defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>;
1366 defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>;
1367 defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>;
1368 defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>;
1369 defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>;
1370 defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>;
1371 defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>;
1372 defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>;
1373 defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1374 defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1375 defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1376 defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1377 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1378 defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1379 defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
1380 defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
1381 defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
1383 // ENC_FLAT_SCRATCH.
1384 defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
1385 defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
1386 defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
1387 defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
1388 defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
1389 defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
1390 defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
1391 defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
1392 defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
1393 defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
1394 defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
1395 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1396 defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
1397 defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
1398 defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
1399 defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
1400 defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
1401 defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
1402 defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
1403 defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
1404 defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
1405 defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
1407 let SubtargetPredicate = HasAtomicFaddInsts in {
1409 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>;
1410 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
1412 } // End SubtargetPredicate = HasAtomicFaddInsts