1 //===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
9 // This file was originally auto-generated from a GPU register header file and
10 // all the instruction definitions were originally commented out. Instructions
11 // that are not yet supported remain commented out.
12 //===----------------------------------------------------------------------===//
14 def isGCN : Predicate<"Subtarget->getGeneration() "
15 ">= SISubtarget::SOUTHERN_ISLANDS">,
16 AssemblerPredicate<"FeatureGCN">;
17 def isSI : Predicate<"Subtarget->getGeneration() "
18 "== SISubtarget::SOUTHERN_ISLANDS">,
19 AssemblerPredicate<"FeatureSouthernIslands">;
21 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
22 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
23 def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
24 AssemblerPredicate<"FeatureVGPRIndexMode">;
25 def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
26 AssemblerPredicate<"FeatureMovrel">;
28 include "VOPInstructions.td"
29 include "SOPInstructions.td"
30 include "SMInstructions.td"
31 include "FLATInstructions.td"
32 include "BUFInstructions.td"
34 let SubtargetPredicate = isGCN in {
36 //===----------------------------------------------------------------------===//
38 //===----------------------------------------------------------------------===//
40 defm EXP : EXP_m<0, AMDGPUexport>;
41 defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
43 //===----------------------------------------------------------------------===//
44 // VINTRP Instructions
45 //===----------------------------------------------------------------------===//
47 let Uses = [M0, EXEC] in {
49 // FIXME: Specify SchedRW for VINTRP insturctions.
51 multiclass V_INTERP_P1_F32_m : VINTRP_m <
54 (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
55 "v_interp_p1_f32 $vdst, $vsrc, $attr$attrchan",
56 [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
60 let OtherPredicates = [has32BankLDS] in {
62 defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
64 } // End OtherPredicates = [has32BankLDS]
66 let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $vdst", isAsmParserOnly=1 in {
68 defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
70 } // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $vdst", isAsmParserOnly=1
72 let DisableEncoding = "$src0", Constraints = "$src0 = $vdst" in {
74 defm V_INTERP_P2_F32 : VINTRP_m <
77 (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
78 "v_interp_p2_f32 $vdst, $vsrc, $attr$attrchan",
79 [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
82 } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
84 defm V_INTERP_MOV_F32 : VINTRP_m <
87 (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
88 "v_interp_mov_f32 $vdst, $vsrc, $attr$attrchan",
89 [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
92 } // End Uses = [M0, EXEC]
94 //===----------------------------------------------------------------------===//
95 // Pseudo Instructions
96 //===----------------------------------------------------------------------===//
97 def ATOMIC_FENCE : SPseudoInstSI<
98 (outs), (ins i32imm:$ordering, i32imm:$scope),
99 [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))],
100 "ATOMIC_FENCE $ordering, $scope"> {
101 let hasSideEffects = 1;
104 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
106 // For use in patterns
107 def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
108 (ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> {
110 let isCodeGenOnly = 1;
111 let usesCustomInserter = 1;
114 // 64-bit vector move instruction. This is mainly used by the SIFoldOperands
115 // pass to enable folding of inline immediates.
116 def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
117 (ins VSrc_b64:$src0)>;
118 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
120 let usesCustomInserter = 1, SALU = 1 in {
121 def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
122 [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
123 } // End let usesCustomInserter = 1, SALU = 1
125 def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
126 (ins SSrc_b64:$src0)> {
128 let isAsCheapAsAMove = 1;
129 let isTerminator = 1;
132 def S_XOR_B64_term : PseudoInstSI<(outs SReg_64:$dst),
133 (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
135 let isAsCheapAsAMove = 1;
136 let isTerminator = 1;
139 def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst),
140 (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
142 let isAsCheapAsAMove = 1;
143 let isTerminator = 1;
146 def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
147 [(int_amdgcn_wave_barrier)]> {
149 let hasNoSchedulingInfo = 1;
150 let hasSideEffects = 1;
154 let isConvergent = 1;
159 // SI pseudo instructions. These are used by the CFG structurizer pass
160 // and should be lowered to ISA instructions prior to codegen.
162 // Dummy terminator instruction to use after control flow instructions
163 // replaced with exec mask operations.
164 def SI_MASK_BRANCH : VPseudoInstSI <
165 (outs), (ins brtarget:$target)> {
167 let isTerminator = 1;
170 let hasNoSchedulingInfo = 1;
175 let isTerminator = 1 in {
177 def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
179 (ins SReg_64:$vcc, brtarget:$target),
180 [(brcond i1:$vcc, bb:$target)]> {
184 def SI_IF: CFPseudoInstSI <
185 (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
186 [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
187 let Constraints = "";
189 let hasSideEffects = 1;
192 def SI_ELSE : CFPseudoInstSI <
194 (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
195 let Constraints = "$src = $dst";
197 let hasSideEffects = 1;
200 def SI_LOOP : CFPseudoInstSI <
201 (outs), (ins SReg_64:$saved, brtarget:$target),
202 [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
205 let hasSideEffects = 1;
208 } // End isTerminator = 1
210 def SI_END_CF : CFPseudoInstSI <
211 (outs), (ins SReg_64:$saved),
212 [(int_amdgcn_end_cf i64:$saved)], 1, 1> {
214 let isAsCheapAsAMove = 1;
215 let isReMaterializable = 1;
216 let hasSideEffects = 1;
217 let mayLoad = 1; // FIXME: Should not need memory flags
221 def SI_BREAK : CFPseudoInstSI <
222 (outs SReg_64:$dst), (ins SReg_64:$src),
223 [(set i64:$dst, (int_amdgcn_break i64:$src))], 1> {
225 let isAsCheapAsAMove = 1;
226 let isReMaterializable = 1;
229 def SI_IF_BREAK : CFPseudoInstSI <
230 (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
231 [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
233 let isAsCheapAsAMove = 1;
234 let isReMaterializable = 1;
237 def SI_ELSE_BREAK : CFPseudoInstSI <
238 (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1),
239 [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]> {
241 let isAsCheapAsAMove = 1;
242 let isReMaterializable = 1;
245 let Uses = [EXEC], Defs = [EXEC,VCC] in {
246 def SI_KILL : PseudoInstSI <
247 (outs), (ins VSrc_b32:$src),
248 [(AMDGPUkill i32:$src)]> {
249 let isConvergent = 1;
250 let usesCustomInserter = 1;
253 def SI_KILL_TERMINATOR : SPseudoInstSI <
254 (outs), (ins VSrc_b32:$src)> {
255 let isTerminator = 1;
258 def SI_ILLEGAL_COPY : SPseudoInstSI <
259 (outs unknown:$dst), (ins unknown:$src),
260 [], " ; illegal copy $src to $dst">;
262 } // End Uses = [EXEC], Defs = [EXEC,VCC]
264 // Branch on undef scc. Used to avoid intermediate copy from
265 // IMPLICIT_DEF to SCC.
266 def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
267 let isTerminator = 1;
268 let usesCustomInserter = 1;
271 def SI_PS_LIVE : PseudoInstSI <
272 (outs SReg_64:$dst), (ins),
273 [(set i1:$dst, (int_amdgcn_ps_live))]> {
277 def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins),
278 [(int_amdgcn_unreachable)],
279 "; divergent unreachable"> {
281 let hasNoSchedulingInfo = 1;
285 // Used as an isel pseudo to directly emit initialization with an
286 // s_mov_b32 rather than a copy of another initialized
287 // register. MachineCSE skips copies, and we don't want to have to
288 // fold operands before it runs.
289 def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
291 let usesCustomInserter = 1;
292 let isAsCheapAsAMove = 1;
293 let isReMaterializable = 1;
296 def SI_INIT_EXEC : SPseudoInstSI <
297 (outs), (ins i64imm:$src), []> {
299 let usesCustomInserter = 1;
300 let isAsCheapAsAMove = 1;
303 def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
304 (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
306 let usesCustomInserter = 1;
309 // Return for returning shaders to a shader variant epilog.
310 def SI_RETURN_TO_EPILOG : SPseudoInstSI <
311 (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
312 let isTerminator = 1;
315 let hasNoSchedulingInfo = 1;
319 let Defs = [M0, EXEC],
320 UseNamedOperandTable = 1 in {
322 class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
323 (outs VGPR_32:$vdst),
324 (ins rc:$src, VS_32:$idx, i32imm:$offset)> {
325 let usesCustomInserter = 1;
328 class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
330 (ins rc:$src, VS_32:$idx, i32imm:$offset, VGPR_32:$val)> {
331 let Constraints = "$src = $vdst";
332 let usesCustomInserter = 1;
335 // TODO: We can support indirect SGPR access.
336 def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
337 def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
338 def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
339 def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
340 def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
342 def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
343 def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
344 def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
345 def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
346 def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
348 } // End Uses = [EXEC], Defs = [M0, EXEC]
350 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
351 let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
352 def _SAVE : PseudoInstSI <
354 (ins sgpr_class:$data, i32imm:$addr)> {
359 def _RESTORE : PseudoInstSI <
360 (outs sgpr_class:$data),
361 (ins i32imm:$addr)> {
365 } // End UseNamedOperandTable = 1
368 // You cannot use M0 as the output of v_readlane_b32 instructions or
369 // use it in the sdata operand of SMEM instructions. We still need to
370 // be able to spill the physical register m0, so allow it for
371 // SI_SPILL_32_* instructions.
372 defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
373 defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
374 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
375 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
376 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
378 multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
379 let UseNamedOperandTable = 1, VGPRSpill = 1,
380 SchedRW = [WriteVMEM] in {
381 def _SAVE : VPseudoInstSI <
383 (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
384 SReg_32:$soffset, i32imm:$offset)> {
387 // (2 * 4) + (8 * num_subregs) bytes maximum
388 let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
391 def _RESTORE : VPseudoInstSI <
392 (outs vgpr_class:$vdata),
393 (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
398 // (2 * 4) + (8 * num_subregs) bytes maximum
399 let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
401 } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
404 defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
405 defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
406 defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
407 defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
408 defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
409 defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
411 def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
413 (ins si_ga:$ptr_lo, si_ga:$ptr_hi),
415 (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr_lo), (tglobaladdr:$ptr_hi))))]> {
419 } // End SubtargetPredicate = isGCN
421 let Predicates = [isGCN] in {
423 (AMDGPUinit_exec i64:$src),
424 (SI_INIT_EXEC (as_i64imm $src))
428 (AMDGPUinit_exec_from_input i32:$input, i32:$shift),
429 (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
433 (AMDGPUtrap timm:$trapid),
438 (AMDGPUelse i64:$src, bb:$target),
439 (SI_ELSE $src, $target, 0)
444 (SI_KILL (i32 0xbf800000))
447 //===----------------------------------------------------------------------===//
449 //===----------------------------------------------------------------------===//
451 let Predicates = [UnsafeFPMath] in {
453 //def : RcpPat<V_RCP_F64_e32, f64>;
454 //defm : RsqPat<V_RSQ_F64_e32, f64>;
455 //defm : RsqPat<V_RSQ_F32_e32, f32>;
457 def : RsqPat<V_RSQ_F32_e32, f32>;
458 def : RsqPat<V_RSQ_F64_e32, f64>;
460 // Convert (x - floor(x)) to fract(x)
462 (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
463 (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
464 (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
467 // Convert (x + (-floor(x))) to fract(x)
469 (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
470 (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
471 (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
474 } // End Predicates = [UnsafeFPMath]
477 // f16_to_fp patterns
479 (f32 (f16_to_fp i32:$src0)),
480 (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
484 (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))),
485 (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
489 (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))),
490 (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
494 (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))),
495 (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
499 (f64 (fpextend f16:$src)),
500 (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
503 // fp_to_fp16 patterns
505 (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
506 (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE)
510 (i32 (fp_to_sint f16:$src)),
511 (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src))
515 (i32 (fp_to_uint f16:$src)),
516 (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src))
520 (f16 (sint_to_fp i32:$src)),
521 (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src))
525 (f16 (uint_to_fp i32:$src)),
526 (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src))
529 //===----------------------------------------------------------------------===//
531 //===----------------------------------------------------------------------===//
533 multiclass FMADPat <ValueType vt, Instruction inst> {
535 (vt (fmad (VOP3NoMods vt:$src0),
536 (VOP3NoMods vt:$src1),
537 (VOP3NoMods vt:$src2))),
538 (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
539 SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
543 defm : FMADPat <f16, V_MAC_F16_e64>;
544 defm : FMADPat <f32, V_MAC_F32_e64>;
546 class FMADModsPat<Instruction inst, SDPatternOperator mad_opr> : Pat<
547 (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod),
548 (VOP3Mods f32:$src1, i32:$src1_mod),
549 (VOP3Mods f32:$src2, i32:$src2_mod))),
550 (inst $src0_mod, $src0, $src1_mod, $src1,
551 $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
554 def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz>;
556 multiclass SelectPat <ValueType vt, Instruction inst> {
558 (vt (select i1:$src0, vt:$src1, vt:$src2)),
559 (inst $src2, $src1, $src0)
563 defm : SelectPat <i16, V_CNDMASK_B32_e64>;
564 defm : SelectPat <i32, V_CNDMASK_B32_e64>;
565 defm : SelectPat <f16, V_CNDMASK_B32_e64>;
566 defm : SelectPat <f32, V_CNDMASK_B32_e64>;
569 (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
570 (V_BCNT_U32_B32_e64 $popcnt, $val)
573 /********** ============================================ **********/
574 /********** Extraction, Insertion, Building and Casting **********/
575 /********** ============================================ **********/
577 foreach Index = 0-2 in {
578 def Extract_Element_v2i32_#Index : Extract_Element <
579 i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
581 def Insert_Element_v2i32_#Index : Insert_Element <
582 i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
585 def Extract_Element_v2f32_#Index : Extract_Element <
586 f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
588 def Insert_Element_v2f32_#Index : Insert_Element <
589 f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
593 foreach Index = 0-3 in {
594 def Extract_Element_v4i32_#Index : Extract_Element <
595 i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
597 def Insert_Element_v4i32_#Index : Insert_Element <
598 i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
601 def Extract_Element_v4f32_#Index : Extract_Element <
602 f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
604 def Insert_Element_v4f32_#Index : Insert_Element <
605 f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
609 foreach Index = 0-7 in {
610 def Extract_Element_v8i32_#Index : Extract_Element <
611 i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
613 def Insert_Element_v8i32_#Index : Insert_Element <
614 i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
617 def Extract_Element_v8f32_#Index : Extract_Element <
618 f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
620 def Insert_Element_v8f32_#Index : Insert_Element <
621 f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
625 foreach Index = 0-15 in {
626 def Extract_Element_v16i32_#Index : Extract_Element <
627 i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
629 def Insert_Element_v16i32_#Index : Insert_Element <
630 i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
633 def Extract_Element_v16f32_#Index : Extract_Element <
634 f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
636 def Insert_Element_v16f32_#Index : Insert_Element <
637 f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
641 // FIXME: Why do only some of these type combinations for SReg and
644 def : BitConvert <i16, f16, VGPR_32>;
645 def : BitConvert <f16, i16, VGPR_32>;
646 def : BitConvert <i16, f16, SReg_32>;
647 def : BitConvert <f16, i16, SReg_32>;
650 def : BitConvert <i32, f32, VGPR_32>;
651 def : BitConvert <f32, i32, VGPR_32>;
652 def : BitConvert <i32, f32, SReg_32>;
653 def : BitConvert <f32, i32, SReg_32>;
654 def : BitConvert <v2i16, i32, SReg_32>;
655 def : BitConvert <i32, v2i16, SReg_32>;
656 def : BitConvert <v2f16, i32, SReg_32>;
657 def : BitConvert <i32, v2f16, SReg_32>;
658 def : BitConvert <v2i16, v2f16, SReg_32>;
659 def : BitConvert <v2f16, v2i16, SReg_32>;
660 def : BitConvert <v2f16, f32, SReg_32>;
661 def : BitConvert <f32, v2f16, SReg_32>;
662 def : BitConvert <v2i16, f32, SReg_32>;
663 def : BitConvert <f32, v2i16, SReg_32>;
666 def : BitConvert <i64, f64, VReg_64>;
667 def : BitConvert <f64, i64, VReg_64>;
668 def : BitConvert <v2i32, v2f32, VReg_64>;
669 def : BitConvert <v2f32, v2i32, VReg_64>;
670 def : BitConvert <i64, v2i32, VReg_64>;
671 def : BitConvert <v2i32, i64, VReg_64>;
672 def : BitConvert <i64, v2f32, VReg_64>;
673 def : BitConvert <v2f32, i64, VReg_64>;
674 def : BitConvert <f64, v2f32, VReg_64>;
675 def : BitConvert <v2f32, f64, VReg_64>;
676 def : BitConvert <f64, v2i32, VReg_64>;
677 def : BitConvert <v2i32, f64, VReg_64>;
678 def : BitConvert <v4i32, v4f32, VReg_128>;
679 def : BitConvert <v4f32, v4i32, VReg_128>;
682 def : BitConvert <v2i64, v4i32, SReg_128>;
683 def : BitConvert <v4i32, v2i64, SReg_128>;
684 def : BitConvert <v2f64, v4f32, VReg_128>;
685 def : BitConvert <v2f64, v4i32, VReg_128>;
686 def : BitConvert <v4f32, v2f64, VReg_128>;
687 def : BitConvert <v4i32, v2f64, VReg_128>;
688 def : BitConvert <v2i64, v2f64, VReg_128>;
689 def : BitConvert <v2f64, v2i64, VReg_128>;
692 def : BitConvert <v8i32, v8f32, SReg_256>;
693 def : BitConvert <v8f32, v8i32, SReg_256>;
694 def : BitConvert <v8i32, v8f32, VReg_256>;
695 def : BitConvert <v8f32, v8i32, VReg_256>;
698 def : BitConvert <v16i32, v16f32, VReg_512>;
699 def : BitConvert <v16f32, v16i32, VReg_512>;
701 /********** =================== **********/
702 /********** Src & Dst modifiers **********/
703 /********** =================== **********/
706 // If denormals are not enabled, it only impacts the compare of the
707 // inputs. The output result is not flushed.
708 class ClampPat<Instruction inst, ValueType vt> : Pat <
709 (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))),
710 (inst i32:$src0_modifiers, vt:$src0,
711 i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE)
714 def : ClampPat<V_MAX_F32_e64, f32>;
715 def : ClampPat<V_MAX_F64, f64>;
716 def : ClampPat<V_MAX_F16_e64, f16>;
718 /********** ================================ **********/
719 /********** Floating point absolute/negative **********/
720 /********** ================================ **********/
722 // Prevent expanding both fneg and fabs.
725 (fneg (fabs f32:$src)),
726 (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit
729 // FIXME: Should use S_OR_B32
731 (fneg (fabs f64:$src)),
732 (REG_SEQUENCE VReg_64,
733 (i32 (EXTRACT_SUBREG f64:$src, sub0)),
735 (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
736 (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
742 (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff)))
747 (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000)))
752 (REG_SEQUENCE VReg_64,
753 (i32 (EXTRACT_SUBREG f64:$src, sub0)),
755 (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
756 (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
762 (REG_SEQUENCE VReg_64,
763 (i32 (EXTRACT_SUBREG f64:$src, sub0)),
765 (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
766 (i32 (V_MOV_B32_e32 (i32 0x80000000)))),
771 (fcopysign f16:$src0, f16:$src1),
772 (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
776 (fcopysign f32:$src0, f16:$src1),
777 (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0,
778 (V_LSHLREV_B32_e64 (i32 16), $src1))
782 (fcopysign f64:$src0, f16:$src1),
783 (REG_SEQUENCE SReg_64,
784 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
785 (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
786 (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1)
790 (fcopysign f16:$src0, f32:$src1),
791 (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
792 (V_LSHRREV_B32_e64 (i32 16), $src1))
796 (fcopysign f16:$src0, f64:$src1),
797 (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
798 (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
803 (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000)))
808 (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff)))
812 (fneg (fabs f16:$src)),
813 (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
818 (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src)
823 (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src)
826 // This is really (fneg (fabs v2f16:$src))
828 // fabs is not reported as free because there is modifier for it in
829 // VOP3P instructions, so it is turned into the bit op.
831 (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))),
832 (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
835 /********** ================== **********/
836 /********** Immediate Patterns **********/
837 /********** ================== **********/
840 (VGPRImm<(i32 imm)>:$imm),
841 (V_MOV_B32_e32 imm:$imm)
845 (VGPRImm<(f32 fpimm)>:$imm),
846 (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
854 // FIXME: Workaround for ordering issue with peephole optimizer where
855 // a register class copy interferes with immediate folding. Should
856 // use s_mov_b32, which can be shrunk to s_movk_i32
858 (VGPRImm<(f16 fpimm)>:$imm),
859 (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
864 (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
869 (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
873 (i32 frameindex:$fi),
874 (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
878 (i64 InlineImm<i64>:$imm),
879 (S_MOV_B64 InlineImm<i64>:$imm)
882 // XXX - Should this use a s_cmp to set SCC?
884 // Set to sign-extended 64-bit value (true = -1, false = 0)
887 (S_MOV_B64 (i64 (as_i64imm $imm)))
891 (f64 InlineFPImm<f64>:$imm),
892 (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
895 /********** ================== **********/
896 /********** Intrinsic Patterns **********/
897 /********** ================== **********/
899 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
902 (i32 (sext i1:$src0)),
903 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
906 class Ext32Pat <SDNode ext> : Pat <
907 (i32 (ext i1:$src0)),
908 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
911 def : Ext32Pat <zext>;
912 def : Ext32Pat <anyext>;
914 // The multiplication scales from [0,1] to the unsigned integer range
916 (AMDGPUurecip i32:$src0),
918 (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1),
919 (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
922 //===----------------------------------------------------------------------===//
924 //===----------------------------------------------------------------------===//
926 def : IMad24Pat<V_MAD_I32_I24>;
927 def : UMad24Pat<V_MAD_U32_U24>;
929 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
930 def : ROTRPattern <V_ALIGNBIT_B32>;
932 def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
933 (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
934 (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
936 def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
937 (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
938 (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
940 /********** ====================== **********/
941 /********** Indirect addressing **********/
942 /********** ====================== **********/
944 multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
945 // Extract with offset
947 (eltvt (extractelt vt:$src, (MOVRELOffset i32:$idx, (i32 imm:$offset)))),
948 (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $src, $idx, imm:$offset)
951 // Insert with offset
953 (insertelt vt:$src, eltvt:$val, (MOVRELOffset i32:$idx, (i32 imm:$offset))),
954 (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $src, $idx, imm:$offset, $val)
958 defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
959 defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
960 defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
961 defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
963 defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
964 defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
965 defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
966 defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
968 //===----------------------------------------------------------------------===//
970 //===----------------------------------------------------------------------===//
973 (add (sub_oneuse (umax i32:$src0, i32:$src1),
974 (umin i32:$src0, i32:$src1)),
976 (V_SAD_U32 $src0, $src1, $src2)
980 (add (select_oneuse (i1 (setugt i32:$src0, i32:$src1)),
981 (sub i32:$src0, i32:$src1),
982 (sub i32:$src1, i32:$src0)),
984 (V_SAD_U32 $src0, $src1, $src2)
987 //===----------------------------------------------------------------------===//
988 // Conversion Patterns
989 //===----------------------------------------------------------------------===//
991 def : Pat<(i32 (sext_inreg i32:$src, i1)),
992 (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16
994 // Handle sext_inreg in i64
996 (i64 (sext_inreg i64:$src, i1)),
997 (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
1001 (i16 (sext_inreg i16:$src, i1)),
1002 (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
1006 (i16 (sext_inreg i16:$src, i8)),
1007 (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
1011 (i64 (sext_inreg i64:$src, i8)),
1012 (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16
1016 (i64 (sext_inreg i64:$src, i16)),
1017 (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16
1021 (i64 (sext_inreg i64:$src, i32)),
1022 (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16
1026 (i64 (zext i32:$src)),
1027 (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1)
1031 (i64 (anyext i32:$src)),
1032 (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
1035 class ZExt_i64_i1_Pat <SDNode ext> : Pat <
1036 (i64 (ext i1:$src)),
1037 (REG_SEQUENCE VReg_64,
1038 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
1039 (S_MOV_B32 (i32 0)), sub1)
1043 def : ZExt_i64_i1_Pat<zext>;
1044 def : ZExt_i64_i1_Pat<anyext>;
1046 // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
1047 // REG_SEQUENCE patterns don't support instructions with multiple outputs.
1049 (i64 (sext i32:$src)),
1050 (REG_SEQUENCE SReg_64, $src, sub0,
1051 (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1)
1055 (i64 (sext i1:$src)),
1056 (REG_SEQUENCE VReg_64,
1057 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
1058 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
1061 class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat <
1062 (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
1063 (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
1066 def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
1067 def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
1068 def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;
1069 def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
1071 // If we need to perform a logical operation on i1 values, we need to
1072 // use vector comparisons since there is only one SCC register. Vector
1073 // comparisons still write to a pair of SGPRs, so treat these as
1074 // 64-bit comparisons. When legalizing SGPR copies, instructions
1075 // resulting in the copies from SCC to these instructions will be
1076 // moved to the VALU.
1078 (i1 (and i1:$src0, i1:$src1)),
1079 (S_AND_B64 $src0, $src1)
1083 (i1 (or i1:$src0, i1:$src1)),
1084 (S_OR_B64 $src0, $src1)
1088 (i1 (xor i1:$src0, i1:$src1)),
1089 (S_XOR_B64 $src0, $src1)
1093 (f32 (sint_to_fp i1:$src)),
1094 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
1098 (f32 (uint_to_fp i1:$src)),
1099 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
1103 (f64 (sint_to_fp i1:$src)),
1104 (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
1108 (f64 (uint_to_fp i1:$src)),
1109 (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
1112 //===----------------------------------------------------------------------===//
1113 // Miscellaneous Patterns
1114 //===----------------------------------------------------------------------===//
1116 (i32 (AMDGPUfp16_zext f16:$src)),
1122 (i32 (trunc i64:$a)),
1123 (EXTRACT_SUBREG $a, sub0)
1127 (i1 (trunc i32:$a)),
1128 (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
1132 (i1 (trunc i16:$a)),
1133 (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
1137 (i1 (trunc i64:$a)),
1138 (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
1139 (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
1143 (i32 (bswap i32:$a)),
1144 (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
1145 (V_ALIGNBIT_B32 $a, $a, (i32 24)),
1146 (V_ALIGNBIT_B32 $a, $a, (i32 8)))
1149 multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
1151 (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
1156 (vt (add (vt (shl 1, vt:$a)), -1)),
1157 (BFM $a, (MOV (i32 0)))
1161 defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
1162 // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
1163 defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
1166 (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
1167 (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0)
1171 (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
1172 (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
1176 (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
1177 (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0)
1181 (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
1182 (V_PK_MUL_F16 SRCMODS.OP_SEL_1, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
1186 // Allow integer inputs
1187 class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : Pat<
1188 (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
1189 (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
1192 def : ExpPattern<AMDGPUexport, i32, EXP>;
1193 def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
1196 (v2i16 (build_vector i16:$src0, i16:$src1)),
1197 (v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
1200 // With multiple uses of the shift, this will duplicate the shift and
1201 // increase register pressure.
1203 (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
1204 (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
1208 (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
1209 (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
1210 (v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
1213 // TODO: Should source modifiers be matched to v_pack_b32_f16?
1215 (v2f16 (build_vector f16:$src0, f16:$src1)),
1216 (v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
1220 // (v2f16 (scalar_to_vector f16:$src0)),
1225 // (v2i16 (scalar_to_vector i16:$src0)),
1229 //===----------------------------------------------------------------------===//
1231 //===----------------------------------------------------------------------===//
1233 let Predicates = [isSI] in {
1235 // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
1236 // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
1237 // way to implement it is using V_FRACT_F64.
1238 // The workaround for the V_FRACT bug is:
1239 // fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
1241 // Convert floor(x) to (x - fract(x))
1243 (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
1248 (V_CNDMASK_B64_PSEUDO
1251 (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
1253 (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
1254 DSTCLAMP.NONE, DSTOMOD.NONE),
1256 (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))),
1257 DSTCLAMP.NONE, DSTOMOD.NONE)
1260 } // End Predicates = [isSI]
1262 //============================================================================//
1263 // Miscellaneous Optimization Patterns
1264 //============================================================================//
1266 // Undo sub x, c -> add x, -c canonicalization since c is more likely
1267 // an inline immediate than -c.
1268 // TODO: Also do for 64-bit.
1270 (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
1271 (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
1274 def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
1276 def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
1277 def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
1279 // This matches 16 permutations of
1280 // max(min(x, y), min(max(x, y), z))
1281 class FPMed3Pat<ValueType vt,
1282 Instruction med3Inst> : Pat<
1283 (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
1284 (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
1285 (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
1286 (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
1287 (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
1288 (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
1291 def : FPMed3Pat<f32, V_MED3_F32>;
1293 let Predicates = [isGFX9] in {
1294 def : FPMed3Pat<f16, V_MED3_F16>;
1295 def : IntMed3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
1296 def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
1297 } // End Predicates = [isGFX9]
1299 //============================================================================//
1300 // Assembler aliases
1301 //============================================================================//
1303 def : MnemonicAlias<"v_add_u32", "v_add_i32">;
1304 def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
1305 def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
1307 } // End isGCN predicate