1 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains instruction defs that are common to all hw codegen
13 //===----------------------------------------------------------------------===//
15 class AMDGPUInst <dag outs, dag ins, string asm = "",
16 list<dag> pattern = []> : Instruction {
17 field bit isRegisterLoad = 0;
18 field bit isRegisterStore = 0;
20 let Namespace = "AMDGPU";
21 let OutOperandList = outs;
22 let InOperandList = ins;
24 let Pattern = pattern;
25 let Itinerary = NullALU;
27 // SoftFail is a field the disassembler can use to provide a way for
28 // instructions to not match without killing the whole decode process. It is
29 // mainly used for ARM, but Tablegen expects this field to exist or it fails
30 // to build the decode table.
31 field bits<64> SoftFail = 0;
33 let DecoderNamespace = Namespace;
35 let TSFlags{63} = isRegisterLoad;
36 let TSFlags{62} = isRegisterStore;
39 class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
40 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
42 field bits<32> Inst = 0xffffffff;
45 def FP16Denormals : Predicate<"Subtarget.hasFP16Denormals()">;
46 def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
47 def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
48 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
50 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
51 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
53 def u16ImmTarget : AsmOperandClass {
55 let RenderMethod = "addImmOperands";
58 def s16ImmTarget : AsmOperandClass {
60 let RenderMethod = "addImmOperands";
63 let OperandType = "OPERAND_IMMEDIATE" in {
65 def u32imm : Operand<i32> {
66 let PrintMethod = "printU32ImmOperand";
69 def u16imm : Operand<i16> {
70 let PrintMethod = "printU16ImmOperand";
71 let ParserMatchClass = u16ImmTarget;
74 def s16imm : Operand<i16> {
75 let PrintMethod = "printU16ImmOperand";
76 let ParserMatchClass = s16ImmTarget;
79 def u8imm : Operand<i8> {
80 let PrintMethod = "printU8ImmOperand";
83 } // End OperandType = "OPERAND_IMMEDIATE"
85 //===--------------------------------------------------------------------===//
87 //===--------------------------------------------------------------------===//
88 def brtarget : Operand<OtherVT>;
90 //===----------------------------------------------------------------------===//
92 //===----------------------------------------------------------------------===//
94 class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
97 [{ return N->hasOneUse(); }]
100 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
101 (ops node:$src0, node:$src1),
103 [{ return N->hasOneUse(); }]
106 class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
107 (ops node:$src0, node:$src1, node:$src2),
108 (op $src0, $src1, $src2),
109 [{ return N->hasOneUse(); }]
112 def trunc_oneuse : HasOneUseUnaryOp<trunc>;
114 let Properties = [SDNPCommutative, SDNPAssociative] in {
115 def smax_oneuse : HasOneUseBinOp<smax>;
116 def smin_oneuse : HasOneUseBinOp<smin>;
117 def umax_oneuse : HasOneUseBinOp<umax>;
118 def umin_oneuse : HasOneUseBinOp<umin>;
119 def fminnum_oneuse : HasOneUseBinOp<fminnum>;
120 def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
121 def and_oneuse : HasOneUseBinOp<and>;
122 def or_oneuse : HasOneUseBinOp<or>;
123 def xor_oneuse : HasOneUseBinOp<xor>;
124 } // Properties = [SDNPCommutative, SDNPAssociative]
126 def sub_oneuse : HasOneUseBinOp<sub>;
128 def srl_oneuse : HasOneUseBinOp<srl>;
129 def shl_oneuse : HasOneUseBinOp<shl>;
131 def select_oneuse : HasOneUseTernaryOp<select>;
133 //===----------------------------------------------------------------------===//
134 // PatLeafs for floating-point comparisons
135 //===----------------------------------------------------------------------===//
137 def COND_OEQ : PatLeaf <
139 [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
142 def COND_ONE : PatLeaf <
144 [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
147 def COND_OGT : PatLeaf <
149 [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
152 def COND_OGE : PatLeaf <
154 [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
157 def COND_OLT : PatLeaf <
159 [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
162 def COND_OLE : PatLeaf <
164 [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
168 def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
169 def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
171 //===----------------------------------------------------------------------===//
172 // PatLeafs for unsigned / unordered comparisons
173 //===----------------------------------------------------------------------===//
175 def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
176 def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
177 def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
178 def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
179 def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
180 def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
182 // XXX - For some reason R600 version is preferring to use unordered
184 def COND_UNE_NE : PatLeaf <
186 [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
189 //===----------------------------------------------------------------------===//
190 // PatLeafs for signed comparisons
191 //===----------------------------------------------------------------------===//
193 def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
194 def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
195 def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
196 def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
198 //===----------------------------------------------------------------------===//
199 // PatLeafs for integer equality
200 //===----------------------------------------------------------------------===//
202 def COND_EQ : PatLeaf <
204 [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
207 def COND_NE : PatLeaf <
209 [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
212 def COND_NULL : PatLeaf <
214 [{(void)N; return false;}]
218 //===----------------------------------------------------------------------===//
219 // Load/Store Pattern Fragments
220 //===----------------------------------------------------------------------===//
222 class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
223 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
226 class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
227 (ops node:$ptr), (op node:$ptr)
230 class PrivateStore <SDPatternOperator op> : PrivateMemOp <
231 (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
234 def load_private : PrivateLoad <load>;
236 def truncstorei8_private : PrivateStore <truncstorei8>;
237 def truncstorei16_private : PrivateStore <truncstorei16>;
238 def store_private : PrivateStore <store>;
240 class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
241 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
244 // Global address space loads
245 class GlobalLoad <SDPatternOperator op> : GlobalMemOp <
246 (ops node:$ptr), (op node:$ptr)
249 def global_load : GlobalLoad <load>;
251 // Global address space stores
252 class GlobalStore <SDPatternOperator op> : GlobalMemOp <
253 (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
256 def global_store : GlobalStore <store>;
257 def global_store_atomic : GlobalStore<atomic_store>;
260 class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
261 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
264 // Constant address space loads
265 class ConstantLoad <SDPatternOperator op> : ConstantMemOp <
266 (ops node:$ptr), (op node:$ptr)
269 def constant_load : ConstantLoad<load>;
271 class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
272 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
275 // Local address space loads
276 class LocalLoad <SDPatternOperator op> : LocalMemOp <
277 (ops node:$ptr), (op node:$ptr)
280 class LocalStore <SDPatternOperator op> : LocalMemOp <
281 (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
284 class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
285 return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS;
288 class FlatLoad <SDPatternOperator op> : FlatMemOp <
289 (ops node:$ptr), (op node:$ptr)
292 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
293 (ld_node node:$ptr), [{
294 LoadSDNode *L = cast<LoadSDNode>(N);
295 return L->getExtensionType() == ISD::ZEXTLOAD ||
296 L->getExtensionType() == ISD::EXTLOAD;
299 def az_extload : AZExtLoadBase <unindexedload>;
301 def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
302 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
305 def az_extloadi8_global : GlobalLoad <az_extloadi8>;
306 def sextloadi8_global : GlobalLoad <sextloadi8>;
308 def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
309 def sextloadi8_constant : ConstantLoad <sextloadi8>;
311 def az_extloadi8_local : LocalLoad <az_extloadi8>;
312 def sextloadi8_local : LocalLoad <sextloadi8>;
314 def extloadi8_private : PrivateLoad <az_extloadi8>;
315 def sextloadi8_private : PrivateLoad <sextloadi8>;
317 def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
318 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
321 def az_extloadi16_global : GlobalLoad <az_extloadi16>;
322 def sextloadi16_global : GlobalLoad <sextloadi16>;
324 def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
325 def sextloadi16_constant : ConstantLoad <sextloadi16>;
327 def az_extloadi16_local : LocalLoad <az_extloadi16>;
328 def sextloadi16_local : LocalLoad <sextloadi16>;
330 def extloadi16_private : PrivateLoad <az_extloadi16>;
331 def sextloadi16_private : PrivateLoad <sextloadi16>;
333 def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
334 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
337 def az_extloadi32_global : GlobalLoad <az_extloadi32>;
339 def az_extloadi32_flat : FlatLoad <az_extloadi32>;
341 def az_extloadi32_constant : ConstantLoad <az_extloadi32>;
343 def truncstorei8_global : GlobalStore <truncstorei8>;
344 def truncstorei16_global : GlobalStore <truncstorei16>;
346 def local_store : LocalStore <store>;
347 def truncstorei8_local : LocalStore <truncstorei8>;
348 def truncstorei16_local : LocalStore <truncstorei16>;
350 def local_load : LocalLoad <load>;
352 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
353 return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
356 def local_load_aligned8bytes : Aligned8Bytes <
357 (ops node:$ptr), (local_load node:$ptr)
360 def local_store_aligned8bytes : Aligned8Bytes <
361 (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
364 class local_binary_atomic_op<SDNode atomic_op> :
365 PatFrag<(ops node:$ptr, node:$value),
366 (atomic_op node:$ptr, node:$value), [{
367 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
371 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
372 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
373 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
374 def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
375 def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
376 def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
377 def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
378 def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
379 def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
380 def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
381 def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
383 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
384 (AMDGPUstore_mskor node:$val, node:$ptr), [{
385 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
388 multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
390 def _32_local : PatFrag <
391 (ops node:$ptr, node:$cmp, node:$swap),
392 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
393 AtomicSDNode *AN = cast<AtomicSDNode>(N);
394 return AN->getMemoryVT() == MVT::i32 &&
395 AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
398 def _64_local : PatFrag<
399 (ops node:$ptr, node:$cmp, node:$swap),
400 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
401 AtomicSDNode *AN = cast<AtomicSDNode>(N);
402 return AN->getMemoryVT() == MVT::i64 &&
403 AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
407 defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
409 multiclass global_binary_atomic_op<SDNode atomic_op> {
411 (ops node:$ptr, node:$value),
412 (atomic_op node:$ptr, node:$value),
413 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
415 def _noret : PatFrag<
416 (ops node:$ptr, node:$value),
417 (atomic_op node:$ptr, node:$value),
418 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
421 (ops node:$ptr, node:$value),
422 (atomic_op node:$ptr, node:$value),
423 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
426 defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
427 defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
428 defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
429 defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
430 defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
431 defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
432 defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
433 defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
434 defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
435 defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
438 def AMDGPUatomic_cmp_swap_global : PatFrag<
439 (ops node:$ptr, node:$value),
440 (AMDGPUatomic_cmp_swap node:$ptr, node:$value),
441 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
443 def atomic_cmp_swap_global : PatFrag<
444 (ops node:$ptr, node:$cmp, node:$value),
445 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
446 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
448 def atomic_cmp_swap_global_noret : PatFrag<
449 (ops node:$ptr, node:$cmp, node:$value),
450 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
451 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
453 def atomic_cmp_swap_global_ret : PatFrag<
454 (ops node:$ptr, node:$cmp, node:$value),
455 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
456 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
458 //===----------------------------------------------------------------------===//
459 // Misc Pattern Fragments
460 //===----------------------------------------------------------------------===//
463 int TWO_PI = 0x40c90fdb;
465 int TWO_PI_INV = 0x3e22f983;
466 int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
467 int FP16_ONE = 0x3C00;
468 int V2FP16_ONE = 0x3C003C00;
469 int FP32_ONE = 0x3f800000;
470 int FP32_NEG_ONE = 0xbf800000;
471 int FP64_ONE = 0x3ff0000000000000;
472 int FP64_NEG_ONE = 0xbff0000000000000;
474 def CONST : Constants;
476 def FP_ZERO : PatLeaf <
478 [{return N->getValueAPF().isZero();}]
481 def FP_ONE : PatLeaf <
483 [{return N->isExactlyValue(1.0);}]
486 def FP_HALF : PatLeaf <
488 [{return N->isExactlyValue(0.5);}]
491 let isCodeGenOnly = 1, isPseudo = 1 in {
493 let usesCustomInserter = 1 in {
495 class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
499 [(set f32:$dst, (AMDGPUclamp f32:$src0))]
502 class FABS <RegisterClass rc> : AMDGPUShaderInst <
506 [(set f32:$dst, (fabs f32:$src0))]
509 class FNEG <RegisterClass rc> : AMDGPUShaderInst <
513 [(set f32:$dst, (fneg f32:$src0))]
516 } // usesCustomInserter = 1
518 multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
519 ComplexPattern addrPat> {
520 let UseNamedOperandTable = 1 in {
522 def RegisterLoad : AMDGPUShaderInst <
523 (outs dstClass:$dst),
524 (ins addrClass:$addr, i32imm:$chan),
525 "RegisterLoad $dst, $addr",
526 [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
528 let isRegisterLoad = 1;
531 def RegisterStore : AMDGPUShaderInst <
533 (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
534 "RegisterStore $val, $addr",
535 [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
537 let isRegisterStore = 1;
542 } // End isCodeGenOnly = 1, isPseudo = 1
544 /* Generic helper patterns for intrinsics */
545 /* -------------------------------------- */
547 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
549 (fpow f32:$src0, f32:$src1),
550 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
553 /* Other helper patterns */
554 /* --------------------- */
556 /* Extract element pattern */
557 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
560 (sub_type (extractelt vec_type:$src, sub_idx)),
561 (EXTRACT_SUBREG $src, sub_reg)
564 /* Insert element pattern */
565 class Insert_Element <ValueType elem_type, ValueType vec_type,
566 int sub_idx, SubRegIndex sub_reg>
568 (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
569 (INSERT_SUBREG $vec, $elem, sub_reg)
572 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
573 // can handle COPY instructions.
574 // bitconvert pattern
575 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
576 (dt (bitconvert (st rc:$src0))),
580 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
581 // can handle COPY instructions.
582 class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
583 (vt (AMDGPUdwordaddr (vt rc:$addr))),
589 multiclass BFIPatterns <Instruction BFI_INT,
590 Instruction LoadImm32,
591 RegisterClass RC64> {
592 // Definition from ISA doc:
593 // (y & x) | (z & ~x)
595 (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
599 // SHA-256 Ch function
602 (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
607 (fcopysign f32:$src0, f32:$src1),
608 (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
612 (f32 (fcopysign f32:$src0, f64:$src1)),
613 (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
614 (i32 (EXTRACT_SUBREG $src1, sub1)))
618 (f64 (fcopysign f64:$src0, f64:$src1)),
620 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
621 (BFI_INT (LoadImm32 (i32 0x7fffffff)),
622 (i32 (EXTRACT_SUBREG $src0, sub1)),
623 (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
627 (f64 (fcopysign f64:$src0, f32:$src1)),
629 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
630 (BFI_INT (LoadImm32 (i32 0x7fffffff)),
631 (i32 (EXTRACT_SUBREG $src0, sub1)),
636 // SHA-256 Ma patterns
638 // ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
639 class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
640 (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
641 (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
644 // Bitfield extract patterns
646 def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
647 return isMask_32(N->getZExtValue());
650 def IMMPopCount : SDNodeXForm<imm, [{
651 return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
655 multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
657 (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
658 (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
662 (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
663 (UBFE $src, (i32 0), $width)
667 (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
668 (SBFE $src, (i32 0), $width)
673 class ROTRPattern <Instruction BIT_ALIGN> : Pat <
674 (rotr i32:$src0, i32:$src1),
675 (BIT_ALIGN $src0, $src0, $src1)
678 // This matches 16 permutations of
679 // max(min(x, y), min(max(x, y), z))
680 class IntMed3Pat<Instruction med3Inst,
681 SDPatternOperator max,
682 SDPatternOperator max_oneuse,
683 SDPatternOperator min_oneuse,
684 ValueType vt = i32> : Pat<
685 (max (min_oneuse vt:$src0, vt:$src1),
686 (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
687 (med3Inst $src0, $src1, $src2)
690 // Special conversion patterns
692 def cvt_rpi_i32_f32 : PatFrag <
694 (fp_to_sint (ffloor (fadd $src, FP_HALF))),
695 [{ (void) N; return TM.Options.NoNaNsFPMath; }]
698 def cvt_flr_i32_f32 : PatFrag <
700 (fp_to_sint (ffloor $src)),
701 [{ (void)N; return TM.Options.NoNaNsFPMath; }]
704 class IMad24Pat<Instruction Inst> : Pat <
705 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
706 (Inst $src0, $src1, $src2)
709 class UMad24Pat<Instruction Inst> : Pat <
710 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
711 (Inst $src0, $src1, $src2)
714 class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
715 (fdiv FP_ONE, vt:$src),
719 class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
720 (AMDGPUrcp (fsqrt vt:$src)),
724 include "R600Instructions.td"
725 include "R700Instructions.td"
726 include "EvergreenInstructions.td"
727 include "CaymanInstructions.td"
729 include "SIInstrInfo.td"