1 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains instruction defs that are common to all hw codegen
13 //===----------------------------------------------------------------------===//
15 class AMDGPUInst <dag outs, dag ins, string asm = "",
16 list<dag> pattern = []> : Instruction {
17 field bit isRegisterLoad = 0;
18 field bit isRegisterStore = 0;
20 let Namespace = "AMDGPU";
21 let OutOperandList = outs;
22 let InOperandList = ins;
24 let Pattern = pattern;
25 let Itinerary = NullALU;
27 // SoftFail is a field the disassembler can use to provide a way for
28 // instructions to not match without killing the whole decode process. It is
29 // mainly used for ARM, but Tablegen expects this field to exist or it fails
30 // to build the decode table.
31 field bits<64> SoftFail = 0;
33 let DecoderNamespace = Namespace;
35 let TSFlags{63} = isRegisterLoad;
36 let TSFlags{62} = isRegisterStore;
39 class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
40 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
42 field bits<32> Inst = 0xffffffff;
45 //===---------------------------------------------------------------------===//
47 //===---------------------------------------------------------------------===//
49 class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
52 let Namespace = "AMDGPU";
53 dag OutOperandList = outs;
54 dag InOperandList = ins;
55 let Pattern = pattern;
56 let AsmString = !strconcat(asmstr, "\n");
58 let Itinerary = NullALU;
60 bit hasZeroOpFlag = 0;
63 let hasSideEffects = 0;
64 let isCodeGenOnly = 1;
67 def TruePredicate : Predicate<"true">;
69 // Exists to help track down where SubtargetPredicate isn't set rather
70 // than letting tablegen crash with an unhelpful error.
71 def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
73 class PredicateControl {
74 Predicate SubtargetPredicate = InvalidPred;
75 list<Predicate> AssemblerPredicates = [];
76 Predicate AssemblerPredicate = TruePredicate;
77 list<Predicate> OtherPredicates = [];
78 list<Predicate> Predicates = !listconcat([SubtargetPredicate,
83 class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
86 def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
87 def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
88 def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
89 def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
90 def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
91 def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
92 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
93 def FMA : Predicate<"Subtarget->hasFMA()">;
95 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
97 def u16ImmTarget : AsmOperandClass {
99 let RenderMethod = "addImmOperands";
102 def s16ImmTarget : AsmOperandClass {
104 let RenderMethod = "addImmOperands";
107 let OperandType = "OPERAND_IMMEDIATE" in {
109 def u32imm : Operand<i32> {
110 let PrintMethod = "printU32ImmOperand";
113 def u16imm : Operand<i16> {
114 let PrintMethod = "printU16ImmOperand";
115 let ParserMatchClass = u16ImmTarget;
118 def s16imm : Operand<i16> {
119 let PrintMethod = "printU16ImmOperand";
120 let ParserMatchClass = s16ImmTarget;
123 def u8imm : Operand<i8> {
124 let PrintMethod = "printU8ImmOperand";
127 } // End OperandType = "OPERAND_IMMEDIATE"
129 //===--------------------------------------------------------------------===//
131 //===--------------------------------------------------------------------===//
132 def brtarget : Operand<OtherVT>;
134 //===----------------------------------------------------------------------===//
136 //===----------------------------------------------------------------------===//
138 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
139 (ops node:$src0, node:$src1),
141 [{ return N->hasOneUse(); }]
144 class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
145 (ops node:$src0, node:$src1, node:$src2),
146 (op $src0, $src1, $src2),
147 [{ return N->hasOneUse(); }]
150 let Properties = [SDNPCommutative, SDNPAssociative] in {
151 def smax_oneuse : HasOneUseBinOp<smax>;
152 def smin_oneuse : HasOneUseBinOp<smin>;
153 def umax_oneuse : HasOneUseBinOp<umax>;
154 def umin_oneuse : HasOneUseBinOp<umin>;
155 def fminnum_oneuse : HasOneUseBinOp<fminnum>;
156 def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
157 def and_oneuse : HasOneUseBinOp<and>;
158 def or_oneuse : HasOneUseBinOp<or>;
159 def xor_oneuse : HasOneUseBinOp<xor>;
160 } // Properties = [SDNPCommutative, SDNPAssociative]
162 def add_oneuse : HasOneUseBinOp<add>;
163 def sub_oneuse : HasOneUseBinOp<sub>;
165 def srl_oneuse : HasOneUseBinOp<srl>;
166 def shl_oneuse : HasOneUseBinOp<shl>;
168 def select_oneuse : HasOneUseTernaryOp<select>;
170 def srl_16 : PatFrag<
171 (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
175 def hi_i16_elt : PatFrag<
176 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
180 def hi_f16_elt : PatLeaf<
182 if (N->getOpcode() != ISD::BITCAST)
184 SDValue Tmp = N->getOperand(0);
186 if (Tmp.getOpcode() != ISD::SRL)
188 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
189 return RHS->getZExtValue() == 16;
193 //===----------------------------------------------------------------------===//
194 // PatLeafs for floating-point comparisons
195 //===----------------------------------------------------------------------===//
197 def COND_OEQ : PatLeaf <
199 [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
202 def COND_ONE : PatLeaf <
204 [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
207 def COND_OGT : PatLeaf <
209 [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
212 def COND_OGE : PatLeaf <
214 [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
217 def COND_OLT : PatLeaf <
219 [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
222 def COND_OLE : PatLeaf <
224 [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
227 def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
228 def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
230 //===----------------------------------------------------------------------===//
231 // PatLeafs for unsigned / unordered comparisons
232 //===----------------------------------------------------------------------===//
234 def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
235 def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
236 def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
237 def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
238 def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
239 def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
241 // XXX - For some reason R600 version is preferring to use unordered
243 def COND_UNE_NE : PatLeaf <
245 [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
248 //===----------------------------------------------------------------------===//
249 // PatLeafs for signed comparisons
250 //===----------------------------------------------------------------------===//
252 def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
253 def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
254 def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
255 def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
257 //===----------------------------------------------------------------------===//
258 // PatLeafs for integer equality
259 //===----------------------------------------------------------------------===//
261 def COND_EQ : PatLeaf <
263 [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
266 def COND_NE : PatLeaf <
268 [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
271 def COND_NULL : PatLeaf <
273 [{(void)N; return false;}]
276 //===----------------------------------------------------------------------===//
277 // PatLeafs for Texture Constants
278 //===----------------------------------------------------------------------===//
280 def TEX_ARRAY : PatLeaf<
282 [{uint32_t TType = (uint32_t)N->getZExtValue();
283 return TType == 9 || TType == 10 || TType == 16;
287 def TEX_RECT : PatLeaf<
289 [{uint32_t TType = (uint32_t)N->getZExtValue();
294 def TEX_SHADOW : PatLeaf<
296 [{uint32_t TType = (uint32_t)N->getZExtValue();
297 return (TType >= 6 && TType <= 8) || TType == 13;
301 def TEX_SHADOW_ARRAY : PatLeaf<
303 [{uint32_t TType = (uint32_t)N->getZExtValue();
304 return TType == 11 || TType == 12 || TType == 17;
308 //===----------------------------------------------------------------------===//
309 // Load/Store Pattern Fragments
310 //===----------------------------------------------------------------------===//
312 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
313 return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
316 class Aligned16Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
317 return cast<MemSDNode>(N)->getAlignment() >= 16;
320 class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
322 class StoreFrag<SDPatternOperator op> : PatFrag <
323 (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
326 class StoreHi16<SDPatternOperator op> : PatFrag <
327 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
330 class PrivateAddress : CodePatPred<[{
331 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
334 class ConstantAddress : CodePatPred<[{
335 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
338 class LocalAddress : CodePatPred<[{
339 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
342 class GlobalAddress : CodePatPred<[{
343 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
346 class GlobalLoadAddress : CodePatPred<[{
347 auto AS = cast<MemSDNode>(N)->getAddressSpace();
348 return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
351 class FlatLoadAddress : CodePatPred<[{
352 const auto AS = cast<MemSDNode>(N)->getAddressSpace();
353 return AS == AMDGPUASI.FLAT_ADDRESS ||
354 AS == AMDGPUASI.GLOBAL_ADDRESS ||
355 AS == AMDGPUASI.CONSTANT_ADDRESS;
358 class FlatStoreAddress : CodePatPred<[{
359 const auto AS = cast<MemSDNode>(N)->getAddressSpace();
360 return AS == AMDGPUASI.FLAT_ADDRESS ||
361 AS == AMDGPUASI.GLOBAL_ADDRESS;
364 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
365 (ld_node node:$ptr), [{
366 LoadSDNode *L = cast<LoadSDNode>(N);
367 return L->getExtensionType() == ISD::ZEXTLOAD ||
368 L->getExtensionType() == ISD::EXTLOAD;
371 def az_extload : AZExtLoadBase <unindexedload>;
373 def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
374 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
377 def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
378 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
381 def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
382 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
385 class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
386 class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
388 class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
389 class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
391 class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
392 class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
394 class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;
395 class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
397 class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
400 def load_private : PrivateLoad <load>;
401 def az_extloadi8_private : PrivateLoad <az_extloadi8>;
402 def sextloadi8_private : PrivateLoad <sextloadi8>;
403 def az_extloadi16_private : PrivateLoad <az_extloadi16>;
404 def sextloadi16_private : PrivateLoad <sextloadi16>;
406 def store_private : PrivateStore <store>;
407 def truncstorei8_private : PrivateStore<truncstorei8>;
408 def truncstorei16_private : PrivateStore <truncstorei16>;
409 def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
410 def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
413 def load_global : GlobalLoad <load>;
414 def sextloadi8_global : GlobalLoad <sextloadi8>;
415 def az_extloadi8_global : GlobalLoad <az_extloadi8>;
416 def sextloadi16_global : GlobalLoad <sextloadi16>;
417 def az_extloadi16_global : GlobalLoad <az_extloadi16>;
418 def atomic_load_global : GlobalLoad<atomic_load>;
420 def store_global : GlobalStore <store>;
421 def truncstorei8_global : GlobalStore <truncstorei8>;
422 def truncstorei16_global : GlobalStore <truncstorei16>;
423 def store_atomic_global : GlobalStore<atomic_store>;
424 def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
425 def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
427 def load_local : LocalLoad <load>;
428 def az_extloadi8_local : LocalLoad <az_extloadi8>;
429 def sextloadi8_local : LocalLoad <sextloadi8>;
430 def az_extloadi16_local : LocalLoad <az_extloadi16>;
431 def sextloadi16_local : LocalLoad <sextloadi16>;
432 def atomic_load_32_local : LocalLoad<atomic_load_32>;
433 def atomic_load_64_local : LocalLoad<atomic_load_64>;
435 def store_local : LocalStore <store>;
436 def truncstorei8_local : LocalStore <truncstorei8>;
437 def truncstorei16_local : LocalStore <truncstorei16>;
438 def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
439 def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
440 def atomic_store_local : LocalStore <atomic_store>;
442 def load_align8_local : Aligned8Bytes <
443 (ops node:$ptr), (load_local node:$ptr)
446 def load_align16_local : Aligned16Bytes <
447 (ops node:$ptr), (load_local node:$ptr)
450 def store_align8_local : Aligned8Bytes <
451 (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
454 def store_align16_local : Aligned16Bytes <
455 (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
458 def load_flat : FlatLoad <load>;
459 def az_extloadi8_flat : FlatLoad <az_extloadi8>;
460 def sextloadi8_flat : FlatLoad <sextloadi8>;
461 def az_extloadi16_flat : FlatLoad <az_extloadi16>;
462 def sextloadi16_flat : FlatLoad <sextloadi16>;
463 def atomic_load_flat : FlatLoad<atomic_load>;
465 def store_flat : FlatStore <store>;
466 def truncstorei8_flat : FlatStore <truncstorei8>;
467 def truncstorei16_flat : FlatStore <truncstorei16>;
468 def atomic_store_flat : FlatStore <atomic_store>;
469 def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress;
470 def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
473 def constant_load : ConstantLoad<load>;
474 def sextloadi8_constant : ConstantLoad <sextloadi8>;
475 def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
476 def sextloadi16_constant : ConstantLoad <sextloadi16>;
477 def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
480 class local_binary_atomic_op<SDNode atomic_op> :
481 PatFrag<(ops node:$ptr, node:$value),
482 (atomic_op node:$ptr, node:$value), [{
483 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
486 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
487 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
488 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
489 def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
490 def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
491 def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
492 def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
493 def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
494 def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
495 def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
496 def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
498 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
499 (AMDGPUstore_mskor node:$val, node:$ptr), [{
500 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
503 class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
504 (ops node:$ptr, node:$cmp, node:$swap),
505 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
506 AtomicSDNode *AN = cast<AtomicSDNode>(N);
507 return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
510 def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
512 multiclass global_binary_atomic_op<SDNode atomic_op> {
514 (ops node:$ptr, node:$value),
515 (atomic_op node:$ptr, node:$value),
516 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
518 def _noret : PatFrag<
519 (ops node:$ptr, node:$value),
520 (atomic_op node:$ptr, node:$value),
521 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
524 (ops node:$ptr, node:$value),
525 (atomic_op node:$ptr, node:$value),
526 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
529 defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
530 defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
531 defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
532 defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
533 defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
534 defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
535 defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
536 defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
537 defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
538 defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
541 def AMDGPUatomic_cmp_swap_global : PatFrag<
542 (ops node:$ptr, node:$value),
543 (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
545 def atomic_cmp_swap_global : PatFrag<
546 (ops node:$ptr, node:$cmp, node:$value),
547 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
550 def atomic_cmp_swap_global_noret : PatFrag<
551 (ops node:$ptr, node:$cmp, node:$value),
552 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
553 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
555 def atomic_cmp_swap_global_ret : PatFrag<
556 (ops node:$ptr, node:$cmp, node:$value),
557 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
558 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
560 //===----------------------------------------------------------------------===//
561 // Misc Pattern Fragments
562 //===----------------------------------------------------------------------===//
565 int TWO_PI = 0x40c90fdb;
567 int TWO_PI_INV = 0x3e22f983;
568 int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
569 int FP16_ONE = 0x3C00;
570 int FP16_NEG_ONE = 0xBC00;
571 int V2FP16_ONE = 0x3C003C00;
572 int FP32_ONE = 0x3f800000;
573 int FP32_NEG_ONE = 0xbf800000;
574 int FP64_ONE = 0x3ff0000000000000;
575 int FP64_NEG_ONE = 0xbff0000000000000;
577 def CONST : Constants;
579 def FP_ZERO : PatLeaf <
581 [{return N->getValueAPF().isZero();}]
584 def FP_ONE : PatLeaf <
586 [{return N->isExactlyValue(1.0);}]
589 def FP_HALF : PatLeaf <
591 [{return N->isExactlyValue(0.5);}]
594 /* Generic helper patterns for intrinsics */
595 /* -------------------------------------- */
597 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
599 (fpow f32:$src0, f32:$src1),
600 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
603 /* Other helper patterns */
604 /* --------------------- */
606 /* Extract element pattern */
607 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
610 (sub_type (extractelt vec_type:$src, sub_idx)),
611 (EXTRACT_SUBREG $src, sub_reg)
613 let SubtargetPredicate = TruePredicate;
616 /* Insert element pattern */
617 class Insert_Element <ValueType elem_type, ValueType vec_type,
618 int sub_idx, SubRegIndex sub_reg>
620 (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
621 (INSERT_SUBREG $vec, $elem, sub_reg)
623 let SubtargetPredicate = TruePredicate;
626 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
627 // can handle COPY instructions.
628 // bitconvert pattern
629 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
630 (dt (bitconvert (st rc:$src0))),
634 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
635 // can handle COPY instructions.
636 class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
637 (vt (AMDGPUdwordaddr (vt rc:$addr))),
643 multiclass BFIPatterns <Instruction BFI_INT,
644 Instruction LoadImm32,
645 RegisterClass RC64> {
646 // Definition from ISA doc:
647 // (y & x) | (z & ~x)
649 (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
655 (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
657 (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
658 (i32 (EXTRACT_SUBREG $y, sub0)),
659 (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
660 (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
661 (i32 (EXTRACT_SUBREG $y, sub1)),
662 (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
665 // SHA-256 Ch function
668 (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
674 (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
676 (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
677 (i32 (EXTRACT_SUBREG $y, sub0)),
678 (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
679 (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
680 (i32 (EXTRACT_SUBREG $y, sub1)),
681 (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
685 (fcopysign f32:$src0, f32:$src1),
686 (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
690 (f32 (fcopysign f32:$src0, f64:$src1)),
691 (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
692 (i32 (EXTRACT_SUBREG $src1, sub1)))
696 (f64 (fcopysign f64:$src0, f64:$src1)),
698 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
699 (BFI_INT (LoadImm32 (i32 0x7fffffff)),
700 (i32 (EXTRACT_SUBREG $src0, sub1)),
701 (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
705 (f64 (fcopysign f64:$src0, f32:$src1)),
707 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
708 (BFI_INT (LoadImm32 (i32 0x7fffffff)),
709 (i32 (EXTRACT_SUBREG $src0, sub1)),
714 // SHA-256 Ma patterns
716 // ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
717 multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
719 (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
720 (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
724 (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
726 (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
727 (i32 (EXTRACT_SUBREG $y, sub0))),
728 (i32 (EXTRACT_SUBREG $z, sub0)),
729 (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
730 (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
731 (i32 (EXTRACT_SUBREG $y, sub1))),
732 (i32 (EXTRACT_SUBREG $z, sub1)),
733 (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
737 // Bitfield extract patterns
739 def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
740 return isMask_32(N->getZExtValue());
743 def IMMPopCount : SDNodeXForm<imm, [{
744 return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
748 multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
750 (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
751 (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
754 // x & ((1 << y) - 1)
756 (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
757 (UBFE $src, (MOV (i32 0)), $width)
762 (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
763 (UBFE $src, (MOV (i32 0)), $width)
766 // x & (-1 >> (bitwidth - y))
768 (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
769 (UBFE $src, (MOV (i32 0)), $width)
772 // x << (bitwidth - y) >> (bitwidth - y)
774 (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
775 (UBFE $src, (MOV (i32 0)), $width)
779 (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
780 (SBFE $src, (MOV (i32 0)), $width)
785 class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
786 (rotr i32:$src0, i32:$src1),
787 (BIT_ALIGN $src0, $src0, $src1)
790 // This matches 16 permutations of
791 // max(min(x, y), min(max(x, y), z))
792 class IntMed3Pat<Instruction med3Inst,
793 SDPatternOperator max,
794 SDPatternOperator max_oneuse,
795 SDPatternOperator min_oneuse,
796 ValueType vt = i32> : AMDGPUPat<
797 (max (min_oneuse vt:$src0, vt:$src1),
798 (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
799 (med3Inst $src0, $src1, $src2)
802 // Special conversion patterns
804 def cvt_rpi_i32_f32 : PatFrag <
806 (fp_to_sint (ffloor (fadd $src, FP_HALF))),
807 [{ (void) N; return TM.Options.NoNaNsFPMath; }]
810 def cvt_flr_i32_f32 : PatFrag <
812 (fp_to_sint (ffloor $src)),
813 [{ (void)N; return TM.Options.NoNaNsFPMath; }]
816 class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
817 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
818 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
819 (Inst $src0, $src1, $src2))
822 class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
823 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
824 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
825 (Inst $src0, $src1, $src2))
828 class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
829 (fdiv FP_ONE, vt:$src),
833 class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
834 (AMDGPUrcp (fsqrt vt:$src)),