1 //===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
10 NamedMatchClass<"SMRDOffset8">> {
11 let OperandType = "OPERAND_IMMEDIATE";
14 def smem_offset : NamedOperandU32<"SMEMOffset",
15 NamedMatchClass<"SMEMOffset">> {
16 let OperandType = "OPERAND_IMMEDIATE";
17 let EncoderMethod = "getSMEMOffsetEncoding";
18 let DecoderMethod = "decodeSMEMOffset";
21 //===----------------------------------------------------------------------===//
22 // Scalar Memory classes
23 //===----------------------------------------------------------------------===//
25 class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
26 InstSI <outs, ins, "", pattern>,
27 SIMCInstr<opName, SIEncodingFamily.NONE> {
29 let isCodeGenOnly = 1;
35 let hasSideEffects = 0;
36 let UseNamedOperandTable = 1;
37 let SchedRW = [WriteSMEM];
39 string Mnemonic = opName;
40 string AsmOperands = asmOps;
42 bits<1> has_sbase = 1;
46 bits<1> has_offset = 1;
47 bits<1> offset_is_imm = 0;
51 class SM_Real <SM_Pseudo ps>
52 : InstSI<ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
55 let isCodeGenOnly = 0;
57 Instruction Opcode = !cast<Instruction>(NAME);
59 // copy relevant pseudo op flags
60 let SubtargetPredicate = ps.SubtargetPredicate;
61 let AsmMatchConverter = ps.AsmMatchConverter;
62 let UseNamedOperandTable = ps.UseNamedOperandTable;
65 bit is_buffer = ps.is_buffer;
71 bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
74 class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
75 : SM_Pseudo<opName, (outs), ins, " $sdata, $sbase, $offset"> {
81 let hasSideEffects = 1;
82 let offset_is_imm = isImm;
83 let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
86 class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
87 : SM_Pseudo<opName, outs, ins, asmOps, pattern> {
88 RegisterClass BaseClass;
95 class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
96 : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
97 RegisterClass BaseClass;
98 RegisterClass SrcClass;
106 class SM_Discard_Pseudo <string opName, dag ins, bit isImm>
107 : SM_Pseudo<opName, (outs), ins, " $sbase, $offset"> {
113 let hasSideEffects = 1;
114 let offset_is_imm = isImm;
115 let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
118 multiclass SM_Pseudo_Loads<string opName,
119 RegisterClass baseClass,
120 RegisterClass dstClass> {
121 def _IMM : SM_Load_Pseudo <opName,
122 (outs dstClass:$sdst),
123 (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
124 " $sdst, $sbase, $offset$glc$dlc", []> {
125 let offset_is_imm = 1;
126 let BaseClass = baseClass;
127 let PseudoInstr = opName # "_IMM";
132 def _SGPR : SM_Load_Pseudo <opName,
133 (outs dstClass:$sdst),
134 (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
135 " $sdst, $sbase, $offset$glc$dlc", []> {
136 let BaseClass = baseClass;
137 let PseudoInstr = opName # "_SGPR";
143 multiclass SM_Pseudo_Stores<string opName,
144 RegisterClass baseClass,
145 RegisterClass srcClass> {
146 def _IMM : SM_Store_Pseudo <opName,
147 (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
148 " $sdata, $sbase, $offset$glc$dlc", []> {
149 let offset_is_imm = 1;
150 let BaseClass = baseClass;
151 let SrcClass = srcClass;
152 let PseudoInstr = opName # "_IMM";
155 def _SGPR : SM_Store_Pseudo <opName,
156 (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
157 " $sdata, $sbase, $offset$glc$dlc", []> {
158 let BaseClass = baseClass;
159 let SrcClass = srcClass;
160 let PseudoInstr = opName # "_SGPR";
164 multiclass SM_Pseudo_Discards<string opName> {
165 def _IMM : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, smem_offset:$offset), 1>;
166 def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
169 class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
170 opName, (outs SReg_64_XEXEC:$sdst), (ins),
171 " $sdst", [(set i64:$sdst, (node))]> {
172 let hasSideEffects = 1;
174 // FIXME: This should be definitively mayStore = 0. TableGen
175 // brokenly tries to infer these based on the intrinsic properties
176 // corresponding to the IR attributes. The target intrinsics are
177 // considered as writing to memory for IR dependency purposes, but
178 // those can be modeled with hasSideEffects here. These also end up
179 // inferring differently for llvm.readcyclecounter and the amdgcn
187 class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
188 opName, (outs), (ins), "", [(node)]> {
189 let hasSideEffects = 1;
196 multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
197 def _IMM : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, smem_offset:$offset), 1>;
198 def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
201 class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
202 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
203 " $sdst", [(set i32:$sdst, (node))]> {
204 let hasSideEffects = 1;
211 //===----------------------------------------------------------------------===//
212 // Scalar Atomic Memory Classes
213 //===----------------------------------------------------------------------===//
215 class SM_Atomic_Pseudo <string opName,
216 dag outs, dag ins, string asmOps, bit isRet>
217 : SM_Pseudo<opName, outs, ins, asmOps, []> {
226 // Should these be set?
228 let hasSideEffects = 1;
232 class SM_Pseudo_Atomic<string opName,
233 RegisterClass baseClass,
234 RegisterClass dataClass,
237 SM_Atomic_Pseudo<opName,
238 !if(isRet, (outs dataClass:$sdst), (outs)),
240 (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, DLC:$dlc),
241 (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
242 !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
244 let offset_is_imm = isImm;
245 let PseudoInstr = opName # !if(isImm,
246 !if(isRet, "_IMM_RTN", "_IMM"),
247 !if(isRet, "_SGPR_RTN", "_SGPR"));
249 let Constraints = !if(isRet, "$sdst = $sdata", "");
250 let DisableEncoding = !if(isRet, "$sdata", "");
253 multiclass SM_Pseudo_Atomics<string opName,
254 RegisterClass baseClass,
255 RegisterClass dataClass> {
256 def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
257 def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
258 def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
259 def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
262 //===----------------------------------------------------------------------===//
263 // Scalar Memory Instructions
264 //===----------------------------------------------------------------------===//
266 // We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
267 // SMRD instructions, because the SReg_32_XM0 register class does not include M0
268 // and writing to M0 from an SMRD instruction will hang the GPU.
270 // XXX - SMEM instructions do not allow exec for data operand, but
271 // does sdst for SMRD on SI/CI?
272 defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
273 defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
274 defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
275 defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
276 defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
278 let is_buffer = 1 in {
279 defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
280 "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
283 // FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
284 // SI/CI, bit disallowed for SMEM on VI.
285 defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
286 "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
289 defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
290 "s_buffer_load_dwordx4", SReg_128, SReg_128
293 defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
294 "s_buffer_load_dwordx8", SReg_128, SReg_256
297 defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
298 "s_buffer_load_dwordx16", SReg_128, SReg_512
302 let SubtargetPredicate = HasScalarStores in {
303 defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
304 defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
305 defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
307 let is_buffer = 1 in {
308 defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
309 "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
312 defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
313 "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
316 defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
317 "s_buffer_store_dwordx4", SReg_128, SReg_128
320 } // End SubtargetPredicate = HasScalarStores
322 let SubtargetPredicate = HasSMemTimeInst in
323 def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
324 def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
326 let SubtargetPredicate = isGFX7GFX8GFX9 in {
327 def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
328 } // let SubtargetPredicate = isGFX7GFX8GFX9
330 let SubtargetPredicate = isGFX8Plus in {
331 let OtherPredicates = [HasScalarStores] in {
332 def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
333 def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
334 } // End OtherPredicates = [HasScalarStores]
335 def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
337 defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
338 let is_buffer = 1 in {
339 defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
341 } // SubtargetPredicate = isGFX8Plus
343 let SubtargetPredicate = isGFX10Plus in
344 def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
345 let SubtargetPredicate = HasGetWaveIdInst in
346 def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
349 let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
350 defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
351 defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
352 defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
354 defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
355 defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
356 defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
357 } // SubtargetPredicate = HasScalarFlatScratchInsts
359 let SubtargetPredicate = HasScalarAtomics in {
361 let is_buffer = 1 in {
362 defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>;
363 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>;
364 defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>;
365 defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <"s_buffer_atomic_sub", SReg_128, SReg_32_XM0_XEXEC>;
366 defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_buffer_atomic_smin", SReg_128, SReg_32_XM0_XEXEC>;
367 defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_buffer_atomic_umin", SReg_128, SReg_32_XM0_XEXEC>;
368 defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_buffer_atomic_smax", SReg_128, SReg_32_XM0_XEXEC>;
369 defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_buffer_atomic_umax", SReg_128, SReg_32_XM0_XEXEC>;
370 defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <"s_buffer_atomic_and", SReg_128, SReg_32_XM0_XEXEC>;
371 defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <"s_buffer_atomic_or", SReg_128, SReg_32_XM0_XEXEC>;
372 defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <"s_buffer_atomic_xor", SReg_128, SReg_32_XM0_XEXEC>;
373 defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <"s_buffer_atomic_inc", SReg_128, SReg_32_XM0_XEXEC>;
374 defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <"s_buffer_atomic_dec", SReg_128, SReg_32_XM0_XEXEC>;
376 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_swap_x2", SReg_128, SReg_64_XEXEC>;
377 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap_x2", SReg_128, SReg_128>;
378 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_add_x2", SReg_128, SReg_64_XEXEC>;
379 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_sub_x2", SReg_128, SReg_64_XEXEC>;
380 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smin_x2", SReg_128, SReg_64_XEXEC>;
381 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umin_x2", SReg_128, SReg_64_XEXEC>;
382 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smax_x2", SReg_128, SReg_64_XEXEC>;
383 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umax_x2", SReg_128, SReg_64_XEXEC>;
384 defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_and_x2", SReg_128, SReg_64_XEXEC>;
385 defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2", SReg_128, SReg_64_XEXEC>;
386 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>;
387 defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>;
388 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>;
391 defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>;
392 defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>;
393 defm S_ATOMIC_ADD : SM_Pseudo_Atomics <"s_atomic_add", SReg_64, SReg_32_XM0_XEXEC>;
394 defm S_ATOMIC_SUB : SM_Pseudo_Atomics <"s_atomic_sub", SReg_64, SReg_32_XM0_XEXEC>;
395 defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_atomic_smin", SReg_64, SReg_32_XM0_XEXEC>;
396 defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_atomic_umin", SReg_64, SReg_32_XM0_XEXEC>;
397 defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_atomic_smax", SReg_64, SReg_32_XM0_XEXEC>;
398 defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_atomic_umax", SReg_64, SReg_32_XM0_XEXEC>;
399 defm S_ATOMIC_AND : SM_Pseudo_Atomics <"s_atomic_and", SReg_64, SReg_32_XM0_XEXEC>;
400 defm S_ATOMIC_OR : SM_Pseudo_Atomics <"s_atomic_or", SReg_64, SReg_32_XM0_XEXEC>;
401 defm S_ATOMIC_XOR : SM_Pseudo_Atomics <"s_atomic_xor", SReg_64, SReg_32_XM0_XEXEC>;
402 defm S_ATOMIC_INC : SM_Pseudo_Atomics <"s_atomic_inc", SReg_64, SReg_32_XM0_XEXEC>;
403 defm S_ATOMIC_DEC : SM_Pseudo_Atomics <"s_atomic_dec", SReg_64, SReg_32_XM0_XEXEC>;
405 defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_atomic_swap_x2", SReg_64, SReg_64_XEXEC>;
406 defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_atomic_cmpswap_x2", SReg_64, SReg_128>;
407 defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_atomic_add_x2", SReg_64, SReg_64_XEXEC>;
408 defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_atomic_sub_x2", SReg_64, SReg_64_XEXEC>;
409 defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_atomic_smin_x2", SReg_64, SReg_64_XEXEC>;
410 defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_atomic_umin_x2", SReg_64, SReg_64_XEXEC>;
411 defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_atomic_smax_x2", SReg_64, SReg_64_XEXEC>;
412 defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_atomic_umax_x2", SReg_64, SReg_64_XEXEC>;
413 defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_atomic_and_x2", SReg_64, SReg_64_XEXEC>;
414 defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_atomic_or_x2", SReg_64, SReg_64_XEXEC>;
415 defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_atomic_xor_x2", SReg_64, SReg_64_XEXEC>;
416 defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_atomic_inc_x2", SReg_64, SReg_64_XEXEC>;
417 defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_64, SReg_64_XEXEC>;
419 } // let SubtargetPredicate = HasScalarAtomics
421 let SubtargetPredicate = HasScalarAtomics in {
422 defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
423 defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
426 //===----------------------------------------------------------------------===//
428 //===----------------------------------------------------------------------===//
430 //===----------------------------------------------------------------------===//
432 //===----------------------------------------------------------------------===//
434 class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
436 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
439 let AssemblerPredicate = isGFX6GFX7;
440 let DecoderNamespace = "GFX6GFX7";
442 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
444 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
445 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
446 let Inst{26-22} = op;
447 let Inst{31-27} = 0x18; //encoding
450 // FIXME: Assembler should reject trying to use glc on SMRD
451 // instructions on SI.
452 multiclass SM_Real_Loads_si<bits<5> op, string ps,
453 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
454 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
456 def _IMM_si : SMRD_Real_si <op, immPs> {
457 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
460 // FIXME: The operand name $offset is inconsistent with $soff used
462 def _SGPR_si : SMRD_Real_si <op, sgprPs> {
463 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
468 defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
469 defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
470 defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
471 defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
472 defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
473 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
474 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
475 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
476 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
477 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
479 def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>;
480 def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
483 //===----------------------------------------------------------------------===//
485 //===----------------------------------------------------------------------===//
487 class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
489 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
493 let AssemblerPredicate = isGFX8GFX9;
494 let DecoderNamespace = "GFX8";
496 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
497 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
499 let Inst{16} = !if(ps.has_glc, glc, ?);
501 let Inst{25-18} = op;
502 let Inst{31-26} = 0x30; //encoding
504 // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
505 // Offset value is corrected accordingly when offset is encoded/decoded.
506 let Inst{52-32} = !if(ps.has_offset, offset{20-0}, ?);
509 multiclass SM_Real_Loads_vi<bits<8> op, string ps,
510 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
511 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
512 def _IMM_vi : SMEM_Real_vi <op, immPs> {
513 let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
515 def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
516 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
520 class SMEM_Real_Store_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
525 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
528 multiclass SM_Real_Stores_vi<bits<8> op, string ps,
529 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
530 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
531 // FIXME: The operand name $offset is inconsistent with $soff used
533 def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
534 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
537 def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
538 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
542 multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
543 def _IMM_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
544 def _SGPR_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
547 defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
548 defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
549 defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
550 defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
551 defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
552 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
553 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
554 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
555 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
556 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
558 defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
559 defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
560 defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
562 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
563 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
564 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
566 // These instructions use same encoding
567 def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>;
568 def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>;
569 def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
570 def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
571 def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>;
572 def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>;
574 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05, "S_SCRATCH_LOAD_DWORD">;
575 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06, "S_SCRATCH_LOAD_DWORDX2">;
576 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07, "S_SCRATCH_LOAD_DWORDX4">;
578 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15, "S_SCRATCH_STORE_DWORD">;
579 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16, "S_SCRATCH_STORE_DWORDX2">;
580 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17, "S_SCRATCH_STORE_DWORDX4">;
582 defm S_ATC_PROBE : SM_Real_Probe_vi <0x26, "S_ATC_PROBE">;
583 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27, "S_ATC_PROBE_BUFFER">;
585 //===----------------------------------------------------------------------===//
587 //===----------------------------------------------------------------------===//
589 class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
590 : SMEM_Real_vi <op, ps> {
594 let Constraints = ps.Constraints;
595 let DisableEncoding = ps.DisableEncoding;
598 let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
601 multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
602 def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
603 def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
604 def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
605 def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
608 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
609 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
610 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42, "S_BUFFER_ATOMIC_ADD">;
611 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43, "S_BUFFER_ATOMIC_SUB">;
612 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44, "S_BUFFER_ATOMIC_SMIN">;
613 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45, "S_BUFFER_ATOMIC_UMIN">;
614 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46, "S_BUFFER_ATOMIC_SMAX">;
615 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47, "S_BUFFER_ATOMIC_UMAX">;
616 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48, "S_BUFFER_ATOMIC_AND">;
617 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49, "S_BUFFER_ATOMIC_OR">;
618 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a, "S_BUFFER_ATOMIC_XOR">;
619 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b, "S_BUFFER_ATOMIC_INC">;
620 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c, "S_BUFFER_ATOMIC_DEC">;
622 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
623 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
624 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
625 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
626 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
627 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
628 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
629 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
630 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68, "S_BUFFER_ATOMIC_AND_X2">;
631 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69, "S_BUFFER_ATOMIC_OR_X2">;
632 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
633 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
634 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
636 defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80, "S_ATOMIC_SWAP">;
637 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81, "S_ATOMIC_CMPSWAP">;
638 defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82, "S_ATOMIC_ADD">;
639 defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83, "S_ATOMIC_SUB">;
640 defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84, "S_ATOMIC_SMIN">;
641 defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85, "S_ATOMIC_UMIN">;
642 defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86, "S_ATOMIC_SMAX">;
643 defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87, "S_ATOMIC_UMAX">;
644 defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88, "S_ATOMIC_AND">;
645 defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89, "S_ATOMIC_OR">;
646 defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a, "S_ATOMIC_XOR">;
647 defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b, "S_ATOMIC_INC">;
648 defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c, "S_ATOMIC_DEC">;
650 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0, "S_ATOMIC_SWAP_X2">;
651 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1, "S_ATOMIC_CMPSWAP_X2">;
652 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2, "S_ATOMIC_ADD_X2">;
653 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3, "S_ATOMIC_SUB_X2">;
654 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4, "S_ATOMIC_SMIN_X2">;
655 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5, "S_ATOMIC_UMIN_X2">;
656 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6, "S_ATOMIC_SMAX_X2">;
657 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7, "S_ATOMIC_UMAX_X2">;
658 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8, "S_ATOMIC_AND_X2">;
659 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9, "S_ATOMIC_OR_X2">;
660 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa, "S_ATOMIC_XOR_X2">;
661 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab, "S_ATOMIC_INC_X2">;
662 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">;
664 multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
665 def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
666 def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
669 defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">;
670 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29, "S_DCACHE_DISCARD_X2">;
672 //===----------------------------------------------------------------------===//
674 //===----------------------------------------------------------------------===//
676 def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
677 NamedMatchClass<"SMRDLiteralOffset">> {
678 let OperandType = "OPERAND_IMMEDIATE";
681 class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
685 let AssemblerPredicate = isGFX7Only;
686 let DecoderNamespace = "GFX7";
687 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
689 let LGKM_CNT = ps.LGKM_CNT;
690 let mayLoad = ps.mayLoad;
691 let mayStore = ps.mayStore;
692 let hasSideEffects = ps.hasSideEffects;
693 let SchedRW = ps.SchedRW;
695 let Inst{7-0} = 0xff;
697 let Inst{14-9} = sbase{6-1};
698 let Inst{21-15} = sdst{6-0};
699 let Inst{26-22} = op;
700 let Inst{31-27} = 0x18; //encoding
701 let Inst{63-32} = offset{31-0};
704 def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
705 def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
706 def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
707 def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
708 def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
709 def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
710 def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
711 def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
712 def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
713 def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
715 class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
717 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
720 let AssemblerPredicate = isGFX7Only;
721 let DecoderNamespace = "GFX7";
723 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
725 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
726 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
727 let Inst{26-22} = op;
728 let Inst{31-27} = 0x18; //encoding
731 def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
733 //===----------------------------------------------------------------------===//
734 // Scalar Memory Patterns
735 //===----------------------------------------------------------------------===//
737 def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
738 let GISelPredicateCode = [{
739 if (!MI.hasOneMemOperand())
741 if (!isInstrUniform(MI))
744 // FIXME: We should probably be caching this.
745 SmallVector<GEPInfo, 4> AddrInfo;
746 getAddrModeInfo(MI, MRI, AddrInfo);
748 if (hasVgprParts(AddrInfo))
754 def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
755 def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
756 def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
757 def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
758 def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
760 multiclass SMRD_Pattern <string Instr, ValueType vt> {
764 (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
765 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
768 // 2. 32-bit IMM offset on CI
770 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
771 (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
772 let OtherPredicates = [isGFX7Only];
777 (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
778 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
783 (vt (smrd_load (i64 SReg_64:$sbase))),
784 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
788 multiclass SMLoad_Pattern <string Instr, ValueType vt> {
789 // 1. Offset as an immediate
791 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
792 (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
793 (extract_dlc $cachepolicy)))> {
794 let AddedComplexity = 2;
797 // 2. 32-bit IMM offset on CI
799 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
800 (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
801 (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
802 let OtherPredicates = [isGFX7Only];
803 let AddedComplexity = 1;
806 // 3. Offset loaded in an 32bit SGPR
808 (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
809 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
810 (extract_dlc $cachepolicy)))
814 // Global and constant loads can be selected to either MUBUF or SMRD
815 // instructions, but SMRD instructions are faster so we want the instruction
816 // selector to prefer those.
817 let AddedComplexity = 100 in {
819 foreach vt = Reg32Types.types in {
820 defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
823 foreach vt = SReg_64.RegTypes in {
824 defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
827 foreach vt = SReg_128.RegTypes in {
828 defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
831 foreach vt = SReg_256.RegTypes in {
832 defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
835 foreach vt = SReg_512.RegTypes in {
836 defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
839 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
840 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
841 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
842 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
843 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
845 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
846 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
847 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
848 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
849 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
850 } // End let AddedComplexity = 100
852 let OtherPredicates = [HasSMemTimeInst] in {
854 (i64 (readcyclecounter)),
857 } // let OtherPredicates = [HasSMemTimeInst]
859 let OtherPredicates = [HasNoSMemTimeInst] in {
861 (i64 (readcyclecounter)),
862 (REG_SEQUENCE SReg_64,
863 (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
864 (S_MOV_B32 (i32 0)), sub1)
866 } // let OtherPredicates = [HasNoSMemTimeInst]
868 //===----------------------------------------------------------------------===//
870 //===----------------------------------------------------------------------===//
872 class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
873 SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
877 let AssemblerPredicate = isGFX10Plus;
878 let DecoderNamespace = "GFX10";
880 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
881 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
882 let Inst{14} = !if(ps.has_dlc, dlc, ?);
883 let Inst{16} = !if(ps.has_glc, glc, ?);
884 let Inst{25-18} = op;
885 let Inst{31-26} = 0x3d;
886 let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
887 let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
888 !if(ps.has_offset, offset{6-0}, ?));
891 multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
892 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
893 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
894 def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
895 let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
897 def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
898 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
902 class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
906 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
909 multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
910 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
911 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
912 // FIXME: The operand name $offset is inconsistent with $soff used
914 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
915 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
918 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
919 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
923 defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
924 defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
925 defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
926 defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
927 defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
929 let SubtargetPredicate = HasScalarFlatScratchInsts in {
930 defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
931 defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
932 defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
933 } // End SubtargetPredicate = HasScalarFlatScratchInsts
935 defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
936 defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
937 defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
938 defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
939 defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
941 let SubtargetPredicate = HasScalarStores in {
942 defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
943 defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
944 defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
945 let OtherPredicates = [HasScalarFlatScratchInsts] in {
946 defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
947 defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
948 defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
949 } // End OtherPredicates = [HasScalarFlatScratchInsts]
950 defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
951 defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
952 defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
953 } // End SubtargetPredicate = HasScalarStores
955 def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
956 def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>;
957 def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
958 def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
959 def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
961 let SubtargetPredicate = HasScalarStores in {
962 def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
963 } // End SubtargetPredicate = HasScalarStores
965 multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
966 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
967 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
970 defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
971 defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
973 class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
974 : SMEM_Real_gfx10 <op, ps> {
979 let Constraints = ps.Constraints;
980 let DisableEncoding = ps.DisableEncoding;
984 let Inst{14} = !if(ps.has_dlc, dlc, 0);
985 let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
988 multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
989 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
990 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
991 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
992 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
995 let SubtargetPredicate = HasScalarAtomics in {
997 defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
998 defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
999 defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
1000 defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
1001 defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
1002 defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
1003 defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
1004 defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
1005 defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
1006 defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
1007 defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
1008 defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
1009 defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
1011 defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
1012 defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
1013 defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
1014 defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
1015 defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
1016 defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
1017 defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
1018 defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
1019 defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
1020 defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
1021 defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
1022 defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
1023 defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
1025 defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
1026 defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
1027 defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
1028 defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
1029 defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
1030 defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
1031 defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
1032 defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
1033 defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
1034 defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
1035 defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
1036 defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
1037 defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
1039 defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
1040 defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
1041 defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
1042 defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
1043 defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
1044 defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
1045 defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
1046 defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
1047 defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
1048 defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
1049 defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
1050 defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
1051 defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
1053 multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
1054 def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1055 def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1058 defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
1059 defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
1061 } // End SubtargetPredicate = HasScalarAtomics
1063 def SMInfoTable : GenericTable {
1064 let FilterClass = "SM_Real";
1065 let CppTypeName = "SMInfo";
1066 let Fields = ["Opcode", "is_buffer"];
1068 let PrimaryKey = ["Opcode"];
1069 let PrimaryKeyName = "getSMEMOpcodeHelper";