1 //===-- VOP2Instructions.td - Vector Instruction Definitions --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
13 class VOP2e <bits<6> op, VOPProfile P> : Enc32 {
18 let Inst{8-0} = !if(P.HasSrc0, src0, 0);
19 let Inst{16-9} = !if(P.HasSrc1, src1, 0);
20 let Inst{24-17} = !if(P.EmitDst, vdst, 0);
22 let Inst{31} = 0x0; //encoding
25 class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
31 let Inst{8-0} = !if(P.HasSrc0, src0, 0);
32 let Inst{16-9} = !if(P.HasSrc1, src1, 0);
33 let Inst{24-17} = !if(P.EmitDst, vdst, 0);
35 let Inst{31} = 0x0; // encoding
36 let Inst{63-32} = imm;
39 class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
43 let Inst{8-0} = 0xf9; // sdwa
44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
47 let Inst{31} = 0x0; // encoding
50 class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> {
54 let Inst{8-0} = 0xf9; // sdwa
55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
58 let Inst{31} = 0x0; // encoding
59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr
62 class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> {
65 let AsmOperands = P.Asm32;
70 let hasSideEffects = 0;
72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
74 let mayRaiseFPException = ReadsModeReg;
78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
80 let AsmVariantName = AMDGPUAsmVariants.Default;
83 class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> :
85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>,
86 SIMCInstr <ps.PseudoInstr, EncodingFamily> {
91 let isCodeGenOnly = 0;
93 let Constraints = ps.Constraints;
94 let DisableEncoding = ps.DisableEncoding;
96 // copy relevant pseudo op flags
97 let SubtargetPredicate = ps.SubtargetPredicate;
98 let OtherPredicates = ps.OtherPredicates;
99 let AsmMatchConverter = ps.AsmMatchConverter;
100 let AsmVariantName = ps.AsmVariantName;
101 let Constraints = ps.Constraints;
102 let DisableEncoding = ps.DisableEncoding;
103 let TSFlags = ps.TSFlags;
104 let UseNamedOperandTable = ps.UseNamedOperandTable;
107 let SchedRW = ps.SchedRW;
108 let mayLoad = ps.mayLoad;
109 let mayStore = ps.mayStore;
112 class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
113 VOP_SDWA_Pseudo <OpName, P, pattern> {
114 let AsmMatchConverter = "cvtSdwaVOP2";
117 class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
118 VOP_DPP_Pseudo <OpName, P, pattern> {
122 class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
123 list<dag> ret = !if(P.HasModifiers,
127 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
128 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
129 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
130 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
133 multiclass VOP2Inst_e32<string opName,
135 SDPatternOperator node = null_frag,
136 string revOp = opName,
137 bit GFX9Renamed = 0> {
138 let renamedInGFX9 = GFX9Renamed in {
139 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
140 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
141 } // End renamedInGFX9 = GFX9Renamed
144 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
145 string VOPDName, SDPatternOperator node = null_frag,
146 string revOp = opName, bit GFX9Renamed = 0> {
147 defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
148 VOPD_Component<VOPDOp, VOPDName>;
150 multiclass VOP2Inst_e64<string opName,
152 SDPatternOperator node = null_frag,
153 string revOp = opName,
154 bit GFX9Renamed = 0> {
155 let renamedInGFX9 = GFX9Renamed in {
156 def _e64 : VOP3InstBase <opName, P, node, 1>,
157 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
159 let SubtargetPredicate = isGFX11Plus in {
160 if P.HasExtVOP3DPP then
161 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
162 } // End SubtargetPredicate = isGFX11Plus
163 } // End renamedInGFX9 = GFX9Renamed
166 multiclass VOP2Inst_sdwa<string opName,
168 bit GFX9Renamed = 0> {
169 let renamedInGFX9 = GFX9Renamed in {
171 def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
172 } // End renamedInGFX9 = GFX9Renamed
175 multiclass VOP2Inst<string opName,
177 SDPatternOperator node = null_frag,
178 string revOp = opName,
179 bit GFX9Renamed = 0> :
180 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
181 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
182 VOP2Inst_sdwa<opName, P, GFX9Renamed> {
183 let renamedInGFX9 = GFX9Renamed in {
185 def _dpp : VOP2_DPP_Pseudo <opName, P>;
189 multiclass VOP2Inst_t16<string opName,
191 SDPatternOperator node = null_frag,
192 string revOp = opName,
193 bit GFX9Renamed = 0> {
194 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in {
195 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
197 let SubtargetPredicate = HasTrue16BitInsts in {
198 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
202 // Creating a _t16_e32 pseudo when there is no corresponding real instruction on
203 // any subtarget is a problem. It makes getMCOpcodeGen return -1, which we
204 // assume means the instruction is already a real. The fix is to not create that
206 multiclass VOP2Inst_e64_t16<string opName,
208 SDPatternOperator node = null_frag,
209 string revOp = opName,
210 bit GFX9Renamed = 0> {
211 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in {
212 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
214 let SubtargetPredicate = HasTrue16BitInsts in {
215 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
219 multiclass VOP2Inst_VOPD<string opName,
223 SDPatternOperator node = null_frag,
224 string revOp = opName,
225 bit GFX9Renamed = 0> :
226 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
227 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
228 VOP2Inst_sdwa<opName, P, GFX9Renamed> {
229 let renamedInGFX9 = GFX9Renamed in {
231 def _dpp : VOP2_DPP_Pseudo <opName, P>;
235 multiclass VOP2bInst <string opName,
237 SDPatternOperator node = null_frag,
238 string revOp = opName,
240 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
241 let renamedInGFX9 = GFX9Renamed in {
242 let SchedRW = [Write32Bit, WriteSALU] in {
243 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
244 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
245 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
246 let usesCustomInserter = true;
250 def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
251 let AsmMatchConverter = "cvtSdwaVOP2b";
254 def _dpp : VOP2_DPP_Pseudo <opName, P>;
255 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC]
257 def _e64 : VOP3InstBase <opName, P, node, 1>,
258 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
260 let SubtargetPredicate = isGFX11Plus in {
261 if P.HasExtVOP3DPP then
262 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
263 } // End SubtargetPredicate = isGFX11Plus
268 class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst,
269 string OpName, string opnd> :
270 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32),
271 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
272 ps.Pfl.Src1RC32:$src1),
273 1, inst.AsmVariantName>,
277 multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
278 let WaveSizePredicate = isWave32 in {
279 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
281 let WaveSizePredicate = isWave64 in {
282 def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
287 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
288 SDPatternOperator node, string revOp, bit useSGPRInput> {
290 let SchedRW = [Write32Bit] in {
291 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
292 if !eq(VOPDOp, -1) then
293 def _e32 : VOP2_Pseudo <opName, P>,
294 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
296 def _e32 : VOP2_Pseudo <opName, P>,
297 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
298 VOPD_Component<VOPDOp, VOPDName>;
301 def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
302 let AsmMatchConverter = "cvtSdwaVOP2e";
306 def _dpp : VOP2_DPP_Pseudo <opName, P>;
309 def _e64 : VOP3InstBase <opName, P, node, 1>,
310 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
311 let isReMaterializable = 1;
314 let SubtargetPredicate = isGFX11Plus in {
315 if P.HasExtVOP3DPP then
316 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
317 } // End SubtargetPredicate = isGFX11Plus
322 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag,
323 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
324 : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>;
327 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
328 SDPatternOperator node = null_frag, string revOp = opName,
329 bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
330 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
332 class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
333 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
334 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
335 ps.Pfl.Src1RC32:$src1),
336 1, inst.AsmVariantName>,
339 class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> :
340 InstAlias <ps.OpName#" "#ps.Pfl.Asm64,
341 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst,
342 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp),
343 1, inst.AsmVariantName>,
346 multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
347 let WaveSizePredicate = isWave32 in {
348 def : VOP2eInstAlias<ps, inst, "vcc_lo">;
350 let WaveSizePredicate = isWave64 in {
351 def : VOP2eInstAlias<ps, inst, "vcc">;
355 class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
356 string AsmVOPDXDeferred = ?;
359 class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
360 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
361 field dag Ins32 = !if(!eq(vt.Size, 32),
362 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
363 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
364 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
365 // Note that both src0X and imm are deferred
366 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred);
367 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
369 field string Asm32 = "$vdst, $src0, $src1, $imm";
370 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm";
371 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred";
372 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm";
373 field bit HasExt = 0;
377 def VOP_MADAK_F16 : VOP_MADAK <f16>;
378 def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
380 let DstRC = VOPDstOperand<VGPR_32_Lo128>;
381 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm);
383 def VOP_MADAK_F32 : VOP_MADAK <f32>;
385 class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
386 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
387 field dag Ins32 = !if(!eq(vt.Size, 32),
388 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1),
389 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1));
390 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
391 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X);
392 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
394 field string Asm32 = "$vdst, $src0, $imm, $src1";
395 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X";
396 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X";
397 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y";
398 field bit HasExt = 0;
402 def VOP_MADMK_F16 : VOP_MADMK <f16>;
403 def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
405 let DstRC = VOPDstOperand<VGPR_32_Lo128>;
406 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1);
408 def VOP_MADMK_F32 : VOP_MADMK <f32>;
410 class getRegisterOperandForVT<ValueType VT> {
411 RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>;
414 // FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
415 // and processing time but it makes it easier to convert to mad.
416 class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
417 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2);
418 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3,
419 0, HasModifiers, HasModifiers, HasOMod,
420 Src0Mod, Src1Mod, Src2Mod>.ret;
421 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
422 Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
423 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
424 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
425 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
426 let InsDPP16 = !con(InsDPP, (ins FI:$fi));
427 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
428 0, HasModifiers, HasModifiers, HasOMod,
429 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret;
430 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
431 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
432 let InsVOPDXDeferred =
433 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X,
434 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X);
435 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
436 let InsVOPDYDeferred =
437 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y,
438 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y);
440 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
441 Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
442 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
444 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
445 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
446 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
447 clampmod:$clamp, omod:$omod,
448 dst_sel:$dst_sel, dst_unused:$dst_unused,
449 src0_sel:$src0_sel, src1_sel:$src1_sel);
450 let Asm32 = getAsm32<1, 2, vt0>.ret;
451 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret;
452 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret;
453 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
454 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
455 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
457 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp,
458 HasOpSel, HasOMod, IsVOP3P, HasModifiers,
459 HasModifiers, HasModifiers,
460 0 /*Src2HasMods*/, DstVT>.ret;
466 let HasExt32BitDPP = 1;
469 let TieRegDPP = "$src2";
472 def VOP_MAC_F16 : VOP_MAC <f16>;
473 def VOP_MAC_F16_t16 : VOP_MAC <f16> {
476 let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod,
477 HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret;
478 let DstRC = VOPDstOperand<VGPR_32_Lo128>;
479 let DstRC64 = VOPDstOperand<VGPR_32>;
480 let Src1RC32 = VGPRSrc_32_Lo128;
481 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT_t16<Src2VT>.ret:$src2);
482 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
483 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
484 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
485 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
486 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
487 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
488 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
489 Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
490 getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument
491 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
492 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
493 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
494 Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
495 getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument
497 let Src2Mod = FP32InputMods; // dummy unused modifiers
498 let Src2RC64 = VGPRSrc_32; // stub argument
500 def VOP_MAC_F32 : VOP_MAC <f32>;
501 let HasExtDPP = 0, HasExt32BitDPP = 0 in
502 def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
503 let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in
504 def VOP_MAC_F64 : VOP_MAC <f64>;
506 class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
513 def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
514 let Src0ModDPP = FPVRegInputMods;
515 let Src1ModDPP = FPVRegInputMods;
519 def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> {
520 let HasExtVOP3DPP = 0;
525 let Src0Mod = Int32InputMods;
526 let Src1Mod = Int32InputMods;
527 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret,
528 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/,
529 1 /*HasSrc2Mods*/, HasOMod,
530 Src0Mod, Src1Mod, Src2Mod>.ret;
531 let Asm64 = "$vdst, $src0, $src1$clamp";
534 // Write out to vcc or arbitrary SGPR.
535 def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> {
536 let Asm32 = "$vdst, vcc, $src0, $src1";
537 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp";
538 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
539 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
540 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
541 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
542 let AsmDPP16 = AsmDPP#"$fi";
543 let InsDPP = (ins DstRCDPP:$old,
546 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
547 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
548 let InsDPP16 = !con(InsDPP, (ins FI:$fi));
549 let InsDPP8 = (ins DstRCDPP:$old,
553 let Outs32 = (outs DstRC:$vdst);
554 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
555 let OutsVOP3DPP = Outs64;
556 let OutsVOP3DPP8 = Outs64;
559 // Write out to vcc or arbitrary SGPR and read in from vcc or
561 def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> {
563 let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
564 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
565 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
566 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
567 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
568 let AsmDPP16 = AsmDPP#"$fi";
569 let Outs32 = (outs DstRC:$vdst);
570 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
571 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp";
572 let OutsVOP3DPP = Outs64;
573 let OutsVOP3DPP8 = Outs64;
575 // Suppress src2 implied by type since the 32-bit encoding uses an
577 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
579 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
580 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
582 dst_sel:$dst_sel, dst_unused:$dst_unused,
583 src0_sel:$src0_sel, src1_sel:$src1_sel);
585 let InsDPP = (ins DstRCDPP:$old,
588 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
589 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
590 let InsDPP16 = !con(InsDPP, (ins FI:$fi));
591 let InsDPP8 = (ins DstRCDPP:$old,
598 let HasExt32BitDPP = 1;
603 // Read in from vcc or arbitrary SGPR.
604 class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
605 let Asm32 = "$vdst, $src0, $src1";
606 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
607 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
608 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
609 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
610 let AsmDPP16 = AsmDPP#"$fi";
611 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
613 let Outs32 = (outs DstRC:$vdst);
614 let Outs64 = (outs DstRC:$vdst);
616 // Suppress src2 implied by type since the 32-bit encoding uses an
618 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1);
620 let HasModifiers = 1;
622 // Select FP modifiers for VOP3
623 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods);
624 let Src1Mod = Src0Mod;
626 let HasSrc0IntMods = 0;
627 let HasSrc1IntMods = 0;
628 let HasSrc0FloatMods = 1;
629 let HasSrc1FloatMods = 1;
630 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0,
631 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1,
633 dst_sel:$dst_sel, dst_unused:$dst_unused,
634 src0_sel:$src0_sel, src1_sel:$src1_sel);
636 let InsDPP = (ins DstRCDPP:$old,
637 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0,
638 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
639 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
640 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
641 let InsDPP16 = !con(InsDPP, (ins FI:$fi));
642 let InsDPP8 = (ins DstRCDPP:$old,
643 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0,
644 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
647 let Src0ModVOP3DPP = FPVRegInputMods;
648 let Src1ModVOP3DPP = FPVRegInputMods;
652 let HasExt32BitDPP = 1;
657 def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
658 def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
660 def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
661 let Outs32 = (outs SReg_32:$vdst);
663 let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1);
665 let Asm32 = " $vdst, $src0, $src1";
670 let HasExt32BitDPP = 0;
671 let HasExt64BitDPP = 0;
676 def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
677 let Outs32 = (outs VGPR_32:$vdst);
679 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in);
681 let Asm32 = " $vdst, $src0, $src1";
688 let HasExt32BitDPP = 0;
689 let HasExt64BitDPP = 0;
694 //===----------------------------------------------------------------------===//
696 //===----------------------------------------------------------------------===//
698 let SubtargetPredicate = isGFX11Plus in
699 defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
700 defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
701 let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
702 def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
704 let isCommutable = 1 in {
705 let isReMaterializable = 1 in {
706 defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>;
707 defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>;
708 defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">;
709 defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>;
710 defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>;
711 defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
712 defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
713 defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
714 defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
715 defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
716 defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
717 defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
718 defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
719 defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
720 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
721 defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
722 defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
723 defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
724 defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
725 defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
726 defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
727 } // End isReMaterializable = 1
729 let mayRaiseFPException = 0 in {
730 let OtherPredicates = [HasMadMacF32Insts] in {
731 let Constraints = "$vdst = $src2", DisableEncoding="$src2",
732 isConvertibleToThreeAddress = 1 in {
733 defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
735 let SubtargetPredicate = isGFX6GFX7GFX10 in
736 defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>;
737 } // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
738 // isConvertibleToThreeAddress = 1
740 let isReMaterializable = 1 in
741 def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
742 } // End OtherPredicates = [HasMadMacF32Insts]
743 } // End mayRaiseFPException = 0
745 // No patterns so that the scalar instructions are always selected.
746 // The scalar versions will be replaced with vector when needed later.
747 defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>;
748 defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
749 defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
750 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
751 defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
752 defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
755 let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
756 defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
757 defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
758 defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
761 } // End isCommutable = 1
763 // These are special and do not read the exec mask.
764 let isConvergent = 1, Uses = []<Register> in {
765 def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
766 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
767 let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
768 def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
769 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
770 } // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
771 } // End isConvergent = 1
773 let isReMaterializable = 1 in {
774 defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
775 defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>;
776 let IsNeverUniform = 1 in {
777 defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
778 defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
779 } // End IsNeverUniform = 1
780 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
782 let ReadsModeReg = 0, mayRaiseFPException = 0 in {
783 defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
784 defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
787 defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>;
788 defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>;
789 defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>;
792 let SubtargetPredicate = isGFX6GFX7 in {
793 defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
794 defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
795 } // End SubtargetPredicate = isGFX6GFX7
797 let isCommutable = 1 in {
798 let SubtargetPredicate = isGFX6GFX7 in {
799 defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>;
800 defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>;
801 defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>;
802 } // End SubtargetPredicate = isGFX6GFX7
803 } // End isCommutable = 1
804 } // End isReMaterializable = 1
806 defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
808 class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
810 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
811 !if(!cast<Commutable_REV>(Inst).IsOrig,
817 class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
819 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
820 !if(!cast<Commutable_REV>(Inst).IsOrig,
821 (Inst $src0, $src1, 0),
822 (Inst $src1, $src0, 0)
826 def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>;
827 def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>;
828 def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>;
830 let SubtargetPredicate = HasAddNoCarryInsts in {
831 def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
832 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>;
835 let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
836 def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>;
837 def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>;
840 def : DivergentBinOp<adde, V_ADDC_U32_e32>;
841 def : DivergentBinOp<sube, V_SUBB_U32_e32>;
843 class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
845 (DivergentBinFrag<Op> i64:$src0, i64:$src1),
846 (REG_SEQUENCE VReg_64,
848 (i32 (EXTRACT_SUBREG $src0, sub0)),
849 (i32 (EXTRACT_SUBREG $src1, sub0))
852 (i32 (EXTRACT_SUBREG $src0, sub1)),
853 (i32 (EXTRACT_SUBREG $src1, sub1))
858 def : divergent_i64_BinOp <and, V_AND_B32_e64>;
859 def : divergent_i64_BinOp <or, V_OR_B32_e64>;
860 def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
862 //===----------------------------------------------------------------------===//
863 // 16-Bit Operand Instructions
864 //===----------------------------------------------------------------------===//
866 // The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware
867 // encoding treats src1 as an f16
868 def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
869 let Src1Mod = Int32InputMods;
870 let Src1ModDPP = IntVRegInputMods;
871 let Src1ModVOP3DPP = IntVRegInputMods;
872 // SDWA sext is the only modifier allowed.
873 let HasSrc1IntMods = 1;
874 let HasSrc1FloatMods = 0;
875 let Src1ModSDWA = Int16SDWAInputMods;
877 def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
878 let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
879 let Src1DPP = VGPR_32_Lo128;
880 let Src1ModDPP = IntT16VRegInputMods;
883 let isReMaterializable = 1 in {
884 let FPDPRounding = 1 in {
885 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in
886 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
887 let SubtargetPredicate = HasTrue16BitInsts in
888 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
889 } // End FPDPRounding = 1
890 // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
891 defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
892 defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>;
893 defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>;
894 let isCommutable = 1 in {
895 let FPDPRounding = 1 in {
896 defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>;
897 defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>;
898 defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
899 defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>;
900 } // End FPDPRounding = 1
901 defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
902 defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
903 defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
904 defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>;
905 defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>;
906 defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>;
907 defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>;
908 } // End isCommutable = 1
909 } // End isReMaterializable = 1
911 class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
912 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
913 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
914 (inst $src0_modifiers, $src0,
915 $src1_modifiers, $src1,
920 let OtherPredicates = [NotHasTrue16BitInsts] in
921 def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
923 let OtherPredicates = [HasTrue16BitInsts] in
924 def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
926 let SubtargetPredicate = isGFX11Plus in {
927 let isCommutable = 1 in {
928 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
929 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
930 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
931 } // End isCommutable = 1
932 } // End SubtargetPredicate = isGFX11Plus
934 let FPDPRounding = 1, isReMaterializable = 1 in {
935 let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in {
936 def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
938 let SubtargetPredicate = HasTrue16BitInsts in {
939 def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">;
942 let isCommutable = 1 in {
943 let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in {
944 def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
946 let SubtargetPredicate = HasTrue16BitInsts in {
947 def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
949 } // End isCommutable = 1
950 } // End FPDPRounding = 1, isReMaterializable = 1
952 let Constraints = "$vdst = $src2",
953 DisableEncoding="$src2",
954 isConvertibleToThreeAddress = 1,
955 isCommutable = 1 in {
956 let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in {
957 defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
959 let SubtargetPredicate = HasTrue16BitInsts in {
960 defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>;
962 } // End FMAC Constraints
964 let SubtargetPredicate = Has16BitInsts in {
965 let isReMaterializable = 1 in {
966 let FPDPRounding = 1 in {
967 def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
968 } // End FPDPRounding = 1
969 let isCommutable = 1 in {
970 let mayRaiseFPException = 0 in {
971 def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
973 let SubtargetPredicate = isGFX8GFX9 in {
974 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
975 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
976 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
978 } // End isCommutable = 1
979 } // End isReMaterializable = 1
981 // FIXME: Missing FPDPRounding
982 let Constraints = "$vdst = $src2", DisableEncoding="$src2",
983 isConvertibleToThreeAddress = 1, isCommutable = 1 in {
984 defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
986 } // End SubtargetPredicate = Has16BitInsts
989 let SubtargetPredicate = HasDLInsts in {
991 let isReMaterializable = 1 in
992 defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
995 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))),
996 (i32 (V_XNOR_B32_e64 $src0, $src1))
1000 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)),
1001 (i32 (V_XNOR_B32_e64 $src0, $src1))
1005 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))),
1006 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
1007 (i32 (EXTRACT_SUBREG $src0, sub0)),
1008 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
1009 (i32 (V_XNOR_B32_e64
1010 (i32 (EXTRACT_SUBREG $src0, sub1)),
1011 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
1015 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)),
1016 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
1017 (i32 (EXTRACT_SUBREG $src0, sub0)),
1018 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
1019 (i32 (V_XNOR_B32_e64
1020 (i32 (EXTRACT_SUBREG $src0, sub1)),
1021 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
1024 let Constraints = "$vdst = $src2",
1025 DisableEncoding = "$src2",
1026 isConvertibleToThreeAddress = 1,
1028 defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
1029 } // End SubtargetPredicate = HasDLInsts
1031 let SubtargetPredicate = HasFmaLegacy32 in {
1033 let Constraints = "$vdst = $src2",
1034 DisableEncoding = "$src2",
1035 isConvertibleToThreeAddress = 1,
1037 defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
1039 } // End SubtargetPredicate = HasFmaLegacy32
1041 let SubtargetPredicate = HasFmacF64Inst,
1042 Constraints = "$vdst = $src2",
1043 DisableEncoding="$src2",
1044 isConvertibleToThreeAddress = 1,
1046 SchedRW = [WriteDoubleAdd] in
1047 defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>;
1049 let Constraints = "$vdst = $src2",
1050 DisableEncoding="$src2",
1051 isConvertibleToThreeAddress = 1,
1054 let SubtargetPredicate = HasDot5Insts in
1055 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">;
1056 let SubtargetPredicate = HasDot6Insts in
1057 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
1059 let SubtargetPredicate = HasDot4Insts in
1060 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
1061 let SubtargetPredicate = HasDot3Insts in
1062 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
1065 let AddedComplexity = 30 in {
1067 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
1068 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
1070 let SubtargetPredicate = HasDot5Insts;
1073 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
1074 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
1076 let SubtargetPredicate = HasDot6Insts;
1079 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
1080 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2))
1082 let SubtargetPredicate = HasDot4Insts;
1085 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
1086 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2))
1088 let SubtargetPredicate = HasDot3Insts;
1090 } // End AddedComplexity = 30
1092 let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
1093 def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
1095 let isCommutable = 1 in
1096 def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
1099 let SubtargetPredicate = HasPkFmacF16Inst in {
1100 defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
1101 } // End SubtargetPredicate = HasPkFmacF16Inst
1103 // Note: 16-bit instructions produce a 0 result in the high 16-bits
1104 // on GFX8 and GFX9 and preserve high 16 bits on GFX10+
1105 multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> {
1108 (i32 (zext (op i16:$src0, i16:$src1))),
1109 (inst VSrc_b16:$src0, VSrc_b16:$src1)
1113 (i64 (zext (op i16:$src0, i16:$src1))),
1114 (REG_SEQUENCE VReg_64,
1115 (inst $src0, $src1), sub0,
1116 (V_MOV_B32_e32 (i32 0)), sub1)
1120 class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
1121 (i16 (ext i1:$src)),
1122 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
1123 (i32 0/*src1mod*/), (i32 1/*src1*/),
1127 foreach vt = [i16, v2i16] in {
1129 (and vt:$src0, vt:$src1),
1130 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1134 (or vt:$src0, vt:$src1),
1135 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1139 (xor vt:$src0, vt:$src1),
1140 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1144 let Predicates = [Has16BitInsts, isGFX8GFX9] in {
1146 // Undo sub x, c -> add x, -c canonicalization since c is more likely
1147 // an inline immediate than -c.
1148 // TODO: Also do for 64-bit.
1150 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
1151 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
1155 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
1156 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
1159 defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
1160 defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>;
1161 defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>;
1162 defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>;
1163 defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>;
1164 defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>;
1165 defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
1166 defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>;
1167 defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>;
1168 defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>;
1170 } // End Predicates = [Has16BitInsts, isGFX8GFX9]
1172 let Predicates = [Has16BitInsts] in {
1174 def : ZExt_i16_i1_Pat<zext>;
1175 def : ZExt_i16_i1_Pat<anyext>;
1178 (i16 (sext i1:$src)),
1179 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
1180 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
1183 } // End Predicates = [Has16BitInsts]
1186 let SubtargetPredicate = HasIntClamp in {
1187 // Set clamp bit for saturation.
1188 def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>;
1189 def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>;
1192 let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in {
1193 let AddedComplexity = 1 in { // Prefer over form with carry-out.
1194 def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>;
1195 def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>;
1199 let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in {
1200 def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>;
1201 def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
1204 //===----------------------------------------------------------------------===//
1206 //===----------------------------------------------------------------------===//
1208 class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
1209 string opName = ps.OpName, VOPProfile p = ps.Pfl,
1211 VOP_DPP<opName, p, IsDPP16> {
1212 let hasSideEffects = ps.hasSideEffects;
1214 let SchedRW = ps.SchedRW;
1219 let Inst{8-0} = 0xfa;
1220 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0);
1221 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
1222 let Inst{30-25} = op;
1226 class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
1227 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
1228 VOP2_DPP<op, ps, opName, p, 1> {
1229 let AssemblerPredicate = HasDPP16;
1230 let SubtargetPredicate = HasDPP16;
1231 let OtherPredicates = ps.OtherPredicates;
1234 class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
1235 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
1236 Base_VOP2_DPP16<op, ps, opName, p>,
1237 SIMCInstr <ps.PseudoInstr, subtarget>;
1239 class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
1240 VOPProfile p = ps.Pfl> :
1241 VOP_DPP8<ps.OpName, p> {
1242 let hasSideEffects = ps.hasSideEffects;
1244 let SchedRW = ps.SchedRW;
1251 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0);
1252 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
1253 let Inst{30-25} = op;
1256 let OtherPredicates = ps.OtherPredicates;
1259 //===----------------------------------------------------------------------===//
1261 //===----------------------------------------------------------------------===//
1263 let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
1264 //===------------------------------- VOP2 -------------------------------===//
1265 multiclass VOP2Only_Real_MADK_gfx11<bits<6> op> {
1267 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX11>,
1268 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1270 multiclass VOP2Only_Real_MADK_gfx11_with_name<bits<6> op, string asmName,
1271 string opName = NAME> {
1273 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX11>,
1274 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
1275 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
1276 let AsmString = asmName # ps.AsmOperands;
1279 multiclass VOP2_Real_e32_gfx11<bits<6> op> {
1281 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX11>,
1282 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
1284 multiclass VOP2Only_Real_e32_gfx11<bits<6> op> {
1286 defm NAME: VOP2_Real_e32_gfx11<op>;
1288 multiclass VOP2_Real_e64_gfx11<bits<6> op> {
1290 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>,
1291 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1293 multiclass VOP2_Real_dpp_gfx11<bits<6> op> {
1294 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
1295 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> {
1296 let DecoderNamespace = "DPPGFX11";
1299 multiclass VOP2_Real_dpp8_gfx11<bits<6> op> {
1300 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
1301 def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
1302 let DecoderNamespace = "DPP8GFX11";
1306 //===------------------------- VOP2 (with name) -------------------------===//
1307 multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName,
1308 string asmName, bit single = 0> {
1309 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1311 VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>,
1312 VOP2e<op{5-0}, ps.Pfl> {
1313 let AsmString = asmName # ps.AsmOperands;
1314 let IsSingle = single;
1317 multiclass VOP2_Real_e64_with_name_gfx11<bits<6> op, string opName,
1319 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1321 VOP3_Real<ps, SIEncodingFamily.GFX11>,
1322 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl> {
1323 let AsmString = asmName # ps.AsmOperands;
1327 multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName,
1329 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1330 if ps.Pfl.HasExtDPP then
1331 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
1332 SIEncodingFamily.GFX11> {
1333 let AsmString = asmName # ps.Pfl.AsmDPP16;
1334 let DecoderNamespace = "DPPGFX11";
1337 multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
1339 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1340 if ps.Pfl.HasExtDPP then
1341 def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
1342 let AsmString = asmName # ps.Pfl.AsmDPP8;
1343 let DecoderNamespace = "DPP8GFX11";
1347 //===------------------------------ VOP2be ------------------------------===//
1348 multiclass VOP2be_Real_e32_gfx11<bits<6> op, string opName, string asmName> {
1349 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1351 VOP2_Real<ps, SIEncodingFamily.GFX11>,
1352 VOP2e<op{5-0}, ps.Pfl> {
1353 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands);
1356 multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> {
1357 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1359 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> {
1360 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1361 let AsmString = asmName # !subst(", vcc", "", AsmDPP);
1362 let DecoderNamespace = "DPPGFX11";
1364 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1365 def _dpp_w32_gfx11 :
1366 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1367 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1368 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
1369 let isAsmParserOnly = 1;
1370 let WaveSizePredicate = isWave32;
1372 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1373 def _dpp_w64_gfx11 :
1374 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1375 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1376 let AsmString = asmName # AsmDPP;
1377 let isAsmParserOnly = 1;
1378 let WaveSizePredicate = isWave64;
1381 multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> {
1382 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1384 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1385 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1386 let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
1387 let DecoderNamespace = "DPP8GFX11";
1389 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1390 def _dpp8_w32_gfx11 :
1391 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1392 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1393 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
1394 let isAsmParserOnly = 1;
1395 let WaveSizePredicate = isWave32;
1397 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1398 def _dpp8_w64_gfx11 :
1399 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1400 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1401 let AsmString = asmName # AsmDPP8;
1402 let isAsmParserOnly = 1;
1403 let WaveSizePredicate = isWave64;
1407 } // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
1409 // We don't want to override separate decoderNamespaces within these
1410 multiclass VOP2_Realtriple_e64_gfx11<bits<6> op> {
1411 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ;
1413 multiclass VOP2_Realtriple_e64_with_name_gfx11<bits<6> op, string opName,
1415 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ;
1418 multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> :
1419 VOP2be_Real_e32_gfx11<op, opName, asmName>,
1420 VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>,
1421 VOP2be_Real_dpp_gfx11<op, opName, asmName>,
1422 VOP2be_Real_dpp8_gfx11<op, opName, asmName>;
1425 multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> :
1426 VOP2_Real_e32_gfx11<op>,
1427 VOP2_Realtriple_e64_gfx11<op>,
1428 VOP2be_Real_dpp_gfx11<op, opName, asmName>,
1429 VOP2be_Real_dpp8_gfx11<op, opName, asmName>;
1431 multiclass VOP2Only_Real_gfx11<bits<6> op> :
1432 VOP2Only_Real_e32_gfx11<op>,
1433 VOP2_Real_dpp_gfx11<op>,
1434 VOP2_Real_dpp8_gfx11<op>;
1436 multiclass VOP2_Real_NO_VOP3_gfx11<bits<6> op> :
1437 VOP2_Real_e32_gfx11<op>, VOP2_Real_dpp_gfx11<op>, VOP2_Real_dpp8_gfx11<op>;
1439 multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
1440 VOP2_Realtriple_e64_gfx11<op>, VOP2_Real_NO_VOP3_gfx11<op>;
1442 multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName,
1443 string asmName, bit isSingle = 0> {
1445 defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName, isSingle>,
1446 VOP2_Real_dpp_with_name_gfx11<op, opName, asmName>,
1447 VOP2_Real_dpp8_with_name_gfx11<op, opName, asmName>;
1448 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1449 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
1452 multiclass VOP2_Real_FULL_with_name_gfx11<bits<6> op, string opName,
1454 VOP2_Realtriple_e64_with_name_gfx11<op, opName, asmName>,
1455 VOP2_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>;
1457 multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME>
1458 : VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>;
1460 multiclass VOP2_Real_NO_DPP_gfx11<bits<6> op> :
1461 VOP2_Real_e32_gfx11<op>, VOP2_Real_e64_gfx11<op>;
1463 multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName,
1465 defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName>,
1466 VOP2_Real_e64_with_name_gfx11<op, opName, asmName>;
1467 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1468 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
1471 defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32",
1473 defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002,
1474 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>;
1475 defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006,
1476 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">;
1477 defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007,
1478 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">;
1479 defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>;
1480 defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>;
1481 defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>;
1482 defm V_ADD_CO_CI_U32 :
1483 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">;
1484 defm V_SUB_CO_CI_U32 :
1485 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">;
1486 defm V_SUBREV_CO_CI_U32 :
1487 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
1489 defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f,
1490 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
1491 defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>;
1493 defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
1494 defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
1495 defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
1496 defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
1497 defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">;
1498 defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">;
1499 defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
1500 defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
1501 defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">;
1502 defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">;
1505 defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>;
1506 defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>;
1507 defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>;
1508 defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>;
1509 defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>;
1510 defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>;
1511 defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">;
1512 defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">;
1513 defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>;
1514 defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>;
1515 defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>;
1516 defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>;
1517 defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>;
1519 let SubtargetPredicate = isGFX11Plus in {
1520 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>;
1522 defm : VOP2bInstAliases<
1523 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">;
1524 defm : VOP2bInstAliases<
1525 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">;
1526 defm : VOP2bInstAliases<
1527 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">;
1528 } // End SubtargetPredicate = isGFX11Plus
1530 //===----------------------------------------------------------------------===//
1532 //===----------------------------------------------------------------------===//
1534 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1535 //===------------------------------- VOP2 -------------------------------===//
1536 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> {
1538 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>,
1539 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1541 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName,
1544 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>,
1545 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
1546 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
1547 let AsmString = asmName # ps.AsmOperands;
1550 multiclass VOP2_Real_e32_gfx10<bits<6> op> {
1552 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
1553 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
1555 multiclass VOP2_Real_e64_gfx10<bits<6> op> {
1557 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1558 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1560 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
1561 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
1563 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
1564 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
1565 let DecoderNamespace = "SDWA10";
1568 multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
1569 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
1570 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
1571 let DecoderNamespace = "SDWA10";
1574 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
1575 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
1576 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
1577 let DecoderNamespace = "DPP8";
1581 //===------------------------- VOP2 (with name) -------------------------===//
1582 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
1585 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
1586 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
1587 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1588 let AsmString = asmName # ps.AsmOperands;
1591 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName,
1594 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1595 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}},
1596 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1597 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
1598 let AsmString = asmName # ps.AsmOperands;
1601 let DecoderNamespace = "SDWA10" in {
1602 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
1604 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
1606 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1607 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1608 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1609 let AsmString = asmName # ps.AsmOperands;
1612 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
1614 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1615 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
1616 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1617 let AsmString = asmName # ps.Pfl.AsmDPP16;
1620 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
1622 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1623 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1624 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1625 let AsmString = asmName # ps.Pfl.AsmDPP8;
1626 let DecoderNamespace = "DPP8";
1629 } // End DecoderNamespace = "SDWA10"
1631 //===------------------------------ VOP2be ------------------------------===//
1632 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> {
1634 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
1635 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
1636 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32");
1637 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
1640 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> {
1642 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1643 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}},
1644 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1645 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
1646 let AsmString = asmName # Ps.AsmOperands;
1649 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> {
1650 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
1652 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1653 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1654 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1655 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
1656 let DecoderNamespace = "SDWA10";
1658 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
1659 def _sdwa_w32_gfx10 :
1660 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1661 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1662 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1663 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
1664 let isAsmParserOnly = 1;
1665 let DecoderNamespace = "SDWA10";
1666 let WaveSizePredicate = isWave32;
1668 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
1669 def _sdwa_w64_gfx10 :
1670 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1671 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1672 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1673 let AsmString = asmName # Ps.AsmOperands;
1674 let isAsmParserOnly = 1;
1675 let DecoderNamespace = "SDWA10";
1676 let WaveSizePredicate = isWave64;
1679 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> {
1680 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1682 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> {
1683 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1684 let AsmString = asmName # !subst(", vcc", "", AsmDPP);
1685 let DecoderNamespace = "SDWA10";
1687 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1688 def _dpp_w32_gfx10 :
1689 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1690 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1691 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
1692 let isAsmParserOnly = 1;
1693 let WaveSizePredicate = isWave32;
1695 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1696 def _dpp_w64_gfx10 :
1697 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1698 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1699 let AsmString = asmName # AsmDPP;
1700 let isAsmParserOnly = 1;
1701 let WaveSizePredicate = isWave64;
1704 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
1705 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1707 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1708 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1709 let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
1710 let DecoderNamespace = "DPP8";
1712 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1713 def _dpp8_w32_gfx10 :
1714 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1715 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1716 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
1717 let isAsmParserOnly = 1;
1718 let WaveSizePredicate = isWave32;
1720 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1721 def _dpp8_w64_gfx10 :
1722 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1723 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1724 let AsmString = asmName # AsmDPP8;
1725 let isAsmParserOnly = 1;
1726 let WaveSizePredicate = isWave64;
1730 //===----------------------------- VOP3Only -----------------------------===//
1731 multiclass VOP3Only_Real_gfx10<bits<10> op> {
1733 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1734 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
1739 //===---------------------------- VOP3beOnly ----------------------------===//
1740 multiclass VOP3beOnly_Real_gfx10<bits<10> op> {
1742 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1743 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
1747 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1749 multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> :
1750 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK_gfx11<op>;
1752 multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> :
1753 VOP2be_Real_e32_gfx10<op, opName, asmName>,
1754 VOP2be_Real_e64_gfx10<op, opName, asmName>,
1755 VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
1756 VOP2be_Real_dpp_gfx10<op, opName, asmName>,
1757 VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
1759 multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> :
1760 VOP2_Real_e32_gfx10<op>,
1761 VOP2_Real_e64_gfx10<op>,
1762 VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
1763 VOP2be_Real_dpp_gfx10<op, opName, asmName>,
1764 VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
1766 multiclass VOP2_Real_gfx10<bits<6> op> :
1767 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
1768 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
1770 multiclass VOP2_Real_gfx10_gfx11<bits<6> op> :
1771 VOP2_Real_gfx10<op>, VOP2_Real_FULL_gfx11<op>;
1773 multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName,
1775 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
1776 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
1777 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>,
1778 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
1779 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
1781 multiclass VOP2_Real_with_name_gfx10_gfx11<bits<6> op, string opName,
1783 VOP2_Real_with_name_gfx10<op, opName, asmName>,
1784 VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>;
1786 // NB: Same opcode as v_mac_legacy_f32
1787 let DecoderNamespace = "GFX10_B" in
1788 defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
1790 defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>;
1791 defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>;
1792 defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>;
1793 defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>;
1794 defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
1795 defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
1796 defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
1797 defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
1798 defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
1799 defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
1800 defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
1801 defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
1802 defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
1803 defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
1805 let IsSingle = 1 in {
1806 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
1809 // VOP2 no carry-in, carry-out.
1811 VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">;
1813 VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">;
1814 defm V_SUBREV_NC_U32 :
1815 VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
1817 // VOP2 carry-in, carry-out.
1818 defm V_ADD_CO_CI_U32 :
1819 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">;
1820 defm V_SUB_CO_CI_U32 :
1821 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">;
1822 defm V_SUBREV_CO_CI_U32 :
1823 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
1825 defm V_CNDMASK_B32 :
1826 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
1829 defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>;
1830 defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>;
1831 defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>;
1832 defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>;
1833 defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>;
1834 defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>;
1835 defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
1836 defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>;
1837 defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>;
1840 defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>;
1841 defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>;
1842 defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>;
1844 let SubtargetPredicate = isGFX10Only in {
1845 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
1847 defm : VOP2bInstAliases<
1848 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">;
1849 defm : VOP2bInstAliases<
1850 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">;
1851 defm : VOP2bInstAliases<
1852 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">;
1853 } // End SubtargetPredicate = isGFX10Only
1855 //===----------------------------------------------------------------------===//
1856 // GFX6, GFX7, GFX10, GFX11
1857 //===----------------------------------------------------------------------===//
1859 class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
1863 let Inst{8-0} = 0xfa; //dpp
1864 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
1865 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
1866 let Inst{30-25} = op;
1867 let Inst{31} = 0x0; //encoding
1870 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1871 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> {
1873 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
1874 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1876 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> {
1878 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
1879 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1881 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> {
1882 def _e32_gfx6_gfx7 :
1883 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>,
1884 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>;
1886 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> {
1887 def _e64_gfx6_gfx7 :
1888 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>,
1889 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>;
1891 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> {
1892 def _e64_gfx6_gfx7 :
1893 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>,
1894 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>;
1896 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1898 multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> :
1899 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>;
1901 multiclass VOP2_Real_gfx6_gfx7<bits<6> op> :
1902 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>;
1904 multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
1905 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
1907 multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> :
1908 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL_gfx11<op>;
1910 multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
1911 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
1913 multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op,
1914 string opName, string asmName> {
1915 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32");
1916 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64");
1918 let AsmString = asmName # ps32.AsmOperands in {
1919 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>;
1922 let AsmString = asmName # ps64.AsmOperands in {
1923 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>;
1927 defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>;
1928 defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>;
1929 defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>;
1930 defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>;
1931 defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>;
1932 defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>;
1933 defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>;
1934 defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>;
1935 defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>;
1936 defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>;
1937 defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>;
1938 defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>;
1939 defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
1940 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
1941 defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>;
1942 defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>;
1944 // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in
1945 // VI, but the VI instructions behave the same as the SI versions.
1946 defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">;
1947 defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">;
1948 defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">;
1949 defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>;
1950 defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>;
1951 defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>;
1953 defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>;
1955 let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
1956 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>;
1957 } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
1959 let SubtargetPredicate = isGFX6GFX7 in {
1960 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
1961 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>;
1962 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>;
1963 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>;
1965 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>;
1966 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>;
1967 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
1968 } // End SubtargetPredicate = isGFX6GFX7
1970 defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>;
1971 defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>;
1972 defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>;
1973 defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
1974 defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
1975 defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>;
1976 defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>;
1977 defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>;
1978 defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>;
1979 defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>;
1980 defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>;
1981 defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>;
1982 defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>;
1983 defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1984 defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1985 defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>;
1986 defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
1987 defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
1988 defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
1989 defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1990 defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1991 defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1992 defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
1993 defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
1994 defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
1995 defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>;
1997 //===----------------------------------------------------------------------===//
1999 //===----------------------------------------------------------------------===//
2001 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
2003 multiclass VOP2_Real_MADK_vi <bits<6> op> {
2004 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
2005 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
2008 multiclass VOP2_Real_MADK_gfx940 <bits<6> op> {
2009 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>,
2010 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
2011 let DecoderNamespace = "GFX9";
2015 multiclass VOP2_Real_e32_vi <bits<6> op> {
2017 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
2018 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
2021 multiclass VOP2_Real_e64_vi <bits<10> op> {
2023 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2024 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
2027 multiclass VOP2_Real_e64only_vi <bits<10> op> {
2029 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2030 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
2035 multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
2036 VOP2_Real_e32_vi<op>,
2037 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
2039 } // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
2041 multiclass VOP2_SDWA_Real <bits<6> op> {
2042 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
2044 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
2045 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
2048 multiclass VOP2_SDWA9_Real <bits<6> op> {
2049 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
2051 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
2052 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
2055 let AssemblerPredicate = isGFX8Only in {
2057 multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
2059 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>,
2060 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
2061 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
2062 let AsmString = AsmName # ps.AsmOperands;
2063 let DecoderNamespace = "GFX8";
2066 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
2067 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
2068 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
2069 let AsmString = AsmName # ps.AsmOperands;
2070 let DecoderNamespace = "GFX8";
2072 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then
2074 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
2075 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
2076 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
2077 let AsmString = AsmName # ps.AsmOperands;
2079 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then
2081 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
2082 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
2083 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
2084 let AsmString = AsmName # ps.AsmOperands;
2089 let AssemblerPredicate = isGFX9Only in {
2091 multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
2093 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>,
2094 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
2095 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
2096 let AsmString = AsmName # ps.AsmOperands;
2097 let DecoderNamespace = "GFX9";
2100 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
2101 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
2102 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
2103 let AsmString = AsmName # ps.AsmOperands;
2104 let DecoderNamespace = "GFX9";
2106 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then
2108 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
2109 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
2110 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
2111 let AsmString = AsmName # ps.AsmOperands;
2113 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then
2115 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
2116 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
2117 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
2118 let AsmString = AsmName # ps.AsmOperands;
2119 let DecoderNamespace = "SDWA9";
2123 multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
2125 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>,
2126 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{
2127 let DecoderNamespace = "GFX9";
2130 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
2131 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
2132 let DecoderNamespace = "GFX9";
2134 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
2136 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
2137 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
2139 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
2141 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
2142 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
2143 let DecoderNamespace = "SDWA9";
2147 } // AssemblerPredicate = isGFX9Only
2149 multiclass VOP2_Real_e32e64_vi <bits<6> op> :
2150 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
2152 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
2154 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
2155 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
2158 defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
2159 defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
2160 defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
2161 defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;
2162 let AssemblerPredicate = isGCN3ExcludingGFX90A in
2163 defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>;
2164 defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>;
2165 defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>;
2166 defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>;
2167 defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>;
2168 defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>;
2169 defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>;
2170 defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>;
2171 defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>;
2172 defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>;
2173 defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>;
2174 defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>;
2175 defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>;
2176 defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>;
2177 defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>;
2178 defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>;
2179 defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>;
2180 defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>;
2181 defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
2182 defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
2183 defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
2185 defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">;
2186 defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">;
2187 defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">;
2188 defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">;
2189 defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">;
2190 defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
2192 defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">;
2193 defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">;
2194 defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">;
2195 defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">;
2196 defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">;
2197 defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
2199 defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
2200 defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
2201 defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
2203 defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
2204 defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
2205 defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>;
2206 defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>;
2207 defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>;
2208 defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>;
2209 defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>;
2210 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>;
2211 defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>;
2212 defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>;
2213 defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>;
2215 defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>;
2216 defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>;
2217 defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>;
2218 defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>;
2219 defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>;
2220 defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>;
2221 defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>;
2222 defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>;
2223 defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>;
2224 defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>;
2225 defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>;
2226 defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>;
2227 defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>;
2228 defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>;
2229 defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>;
2230 defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>;
2231 defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>;
2232 defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>;
2233 defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>;
2234 defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>;
2235 defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>;
2237 let SubtargetPredicate = isGFX8GFX9 in {
2239 // Aliases to simplify matching of floating-point instructions that
2240 // are VOP2 on SI and VOP3 on VI.
2241 class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias <
2242 name#" $dst, $src0, $src1",
2243 !if(inst.Pfl.HasOMod,
2244 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0),
2245 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0))
2246 >, PredicateControl {
2247 let UseInstAsmMatchConverter = 0;
2248 let AsmVariantName = AMDGPUAsmVariants.VOP3;
2251 def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
2252 def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
2253 def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
2254 def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
2255 def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
2257 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
2259 } // End SubtargetPredicate = isGFX8GFX9
2261 let SubtargetPredicate = isGFX9Only in {
2263 defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">;
2264 defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">;
2265 defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">;
2266 defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">;
2267 defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">;
2268 defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
2270 } // End SubtargetPredicate = isGFX9Only
2272 let SubtargetPredicate = HasDLInsts in {
2274 defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
2275 defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
2277 } // End SubtargetPredicate = HasDLInsts
2279 let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in {
2280 multiclass VOP2_Real_e32_gfx90a <bits<6> op> {
2282 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>,
2283 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
2286 multiclass VOP2_Real_e64_gfx90a <bits<10> op> {
2288 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>,
2289 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
2292 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> :
2293 VOP2_Real_e32_gfx90a<op>,
2294 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>;
2296 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> :
2297 Base_VOP2_Real_e32e64_gfx90a<op> {
2299 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
2301 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>,
2302 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
2303 let DecoderNamespace = "SDWA9";
2306 } // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A"
2308 let SubtargetPredicate = HasFmacF64Inst in {
2309 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>;
2310 } // End SubtargetPredicate = HasFmacF64Inst
2312 let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in {
2313 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>;
2316 let SubtargetPredicate = HasFmaakFmamkF32Insts in {
2317 defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;
2318 defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>;
2321 multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> {
2322 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
2325 multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
2326 VOP2_Real_e32_gfx10<op>,
2327 VOP2_Real_dpp_gfx10<op>,
2328 VOP2_Real_dpp8_gfx10<op>;
2330 let SubtargetPredicate = HasDot5Insts in {
2331 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
2332 // NB: Opcode conflicts with V_DOT8C_I32_I4
2333 // This opcode exists in gfx 10.1* only
2334 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
2337 let SubtargetPredicate = HasDot6Insts in {
2338 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>;
2339 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
2342 let SubtargetPredicate = HasDot4Insts in {
2343 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>;
2345 let SubtargetPredicate = HasDot3Insts in {
2346 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>;
2349 let SubtargetPredicate = HasPkFmacF16Inst in {
2350 defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
2351 } // End SubtargetPredicate = HasPkFmacF16Inst
2353 let SubtargetPredicate = HasDot3Insts in {
2354 // NB: Opcode conflicts with V_DOT2C_F32_F16
2355 let DecoderNamespace = "GFX10_B" in
2356 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>;