//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; def HVF16: PatLeaf<(VecF16 HvxVR:$R)>; def HVF32: PatLeaf<(VecF32 HvxVR:$R)>; def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>; def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>; def SDTVecUnaryOp: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def SDTVecBinOp: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; def HwLen2: SDNodeXFormgetSubtarget(); return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); }]>; def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>; def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>; def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt), (V6_vandvrt (V6_vor (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)), (A2_tfrsi (HwLen2 (i32 0)))), // Half the vector length (V6_vpackeb (V6_vd0), (Q2V $Qt))), (A2_tfrsi -1))>; def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>; def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>; def vzero: PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>; def qtrue: PatFrag<(ops), (HexagonQTRUE)>; def qfalse: PatFrag<(ops), (HexagonQFALSE)>; def qcat: PatFrag<(ops node:$Qs, node:$Qt), (HexagonQCAT node:$Qs, node:$Qt)>; def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>; def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>; def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; def IsVecOff : PatLeaf<(i32 imm), [{ int32_t V = N->getSExtValue(); int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); assert(isPowerOf2_32(VecSize)); if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) return false; int32_t L = Log2_32(VecSize); return isInt<4>(V >> L); }]>; def alignedload: PatFrag<(ops node:$a), (load $a), [{ return isAlignedMemNode(cast(N)); }]>; def unalignedload: PatFrag<(ops node:$a), (load $a), [{ return !isAlignedMemNode(cast(N)); }]>; def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ return isAlignedMemNode(cast(N)); }]>; def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ return !isAlignedMemNode(cast(N)); }]>; // HVX loads multiclass HvxLdfi_pat { def: Pat<(ResType (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), (MI AddrFI:$fi, imm:$Off)>; def: Pat<(ResType (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), (MI AddrFI:$fi, imm:$Off)>; def: Pat<(ResType (Load AddrFI:$fi)), (ResType (MI AddrFI:$fi, 0))>; } multiclass HvxLdgi_pat { def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$Off))), (MI I32:$Rt, imm:$Off)>; def: Pat<(ResType (Load I32:$Rt)), (MI I32:$Rt, 0)>; } multiclass HvxLdc_pat { // The HVX selection code for shuffles can generate vector constants. // Calling "Select" on the resulting loads from CP fails without these // patterns. def: Pat<(ResType (Load (HexagonCP tconstpool:$Addr))), (MI (A2_tfrsi imm:$Addr), 0)>; def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$Addr))), (MI (C4_addipc imm:$Addr), 0)>; } multiclass HvxLd_pat { defm: HvxLdfi_pat; defm: HvxLdgi_pat; defm: HvxLdc_pat ; } // Aligned loads: everything, plus loads with valignaddr node. multiclass HvxLda_pat { let AddedComplexity = 50 in { def: Pat<(ResType (Load (valignaddr I32:$Rt))), (MI I32:$Rt, 0)>; def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))), (MI I32:$Rt, imm:$Off)>; } defm: HvxLd_pat; } let Predicates = [UseHVX] in { // alignedload will match a non-temporal load as well, so try non-temporal // first. defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLd_pat; defm: HvxLd_pat; defm: HvxLd_pat; } let Predicates = [UseHVXV68] in { defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLd_pat; defm: HvxLd_pat; } // HVX stores multiclass HvxStfi_pat { def: Pat<(Store Value:$Vs, (add (i32 AddrFI:$fi), ImmPred:$Off)), (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; def: Pat<(Store Value:$Vs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; def: Pat<(Store Value:$Vs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Vs)>; } multiclass HvxStgi_pat { def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$Off)), (MI I32:$Rt, imm:$Off, Value:$Vs)>; def: Pat<(Store Value:$Vs, (IsOrAdd I32:$Rt, ImmPred:$Off)), (MI I32:$Rt, imm:$Off, Value:$Vs)>; def: Pat<(Store Value:$Vs, I32:$Rt), (MI I32:$Rt, 0, Value:$Vs)>; } multiclass HvxSt_pat { defm: HvxStfi_pat; defm: HvxStgi_pat; } let Predicates = [UseHVX] in { // alignedstore will match a non-temporal store as well, so try non-temporal // first. defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; } let Predicates = [UseHVXV68] in { defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; } // Bitcasts between same-size vector types are no-ops, except for the // actual type change. let Predicates = [UseHVX] in { defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; } let Predicates = [UseHVX, UseHVXFloatingPoint] in { defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; defm: NopCast_pat; } let Predicates = [UseHVX] in { let AddedComplexity = 100 in { // These should be preferred over a vsplat of 0. def: Pat<(VecI8 vzero), (V6_vd0)>; def: Pat<(VecI16 vzero), (V6_vd0)>; def: Pat<(VecI32 vzero), (V6_vd0)>; def: Pat<(VecPI8 vzero), (PS_vdd0)>; def: Pat<(VecPI16 vzero), (PS_vdd0)>; def: Pat<(VecPI32 vzero), (PS_vdd0)>; def: Pat<(VecPF32 vzero), (PS_vdd0)>; def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; } def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), (V6_extractw HvxVR:$Vu, I32:$Rs)>; def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), (V6_extractw HvxVR:$Vu, I32:$Rs)>; def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), (V6_extractw HvxVR:$Vu, I32:$Rs)>; def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; } let Predicates = [UseHVX, UseHVXFloatingPoint] in { let AddedComplexity = 100 in { def: Pat<(VecF16 vzero), (V6_vd0)>; def: Pat<(VecF32 vzero), (V6_vd0)>; def: Pat<(VecPF16 vzero), (PS_vdd0)>; def: Pat<(VecPF32 vzero), (PS_vdd0)>; def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>; def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>; } def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; } // Splats for HvxV60 def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>; def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>; def V60splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>; def V60splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>; def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; // Splats for HvxV62+ def V62splatib: OutPatFrag<(ops node:$V), (V6_lvsplatb (ToI32 $V))>; def V62splatih: OutPatFrag<(ops node:$V), (V6_lvsplath (ToI32 $V))>; def V62splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>; def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>; def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>; let Predicates = [UseHVX,UseHVXV60] in { let AddedComplexity = 10 in { def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V60splatib $V)>; def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>; def: Pat<(VecI32 (splat_vector anyimm:$V)), (V60splatiw $V)>; def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), (Rep (V60splatib $V))>; def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>; def: Pat<(VecPI32 (splat_vector anyimm:$V)), (Rep (V60splatiw $V))>; } def: Pat<(VecI8 (splat_vector I32:$Rs)), (V60splatrb $Rs)>; def: Pat<(VecI16 (splat_vector I32:$Rs)), (V60splatrh $Rs)>; def: Pat<(VecI32 (splat_vector I32:$Rs)), (V60splatrw $Rs)>; def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>; def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>; def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>; } let Predicates = [UseHVX,UseHVXV62] in { let AddedComplexity = 30 in { def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V62splatib imm:$V)>; def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>; def: Pat<(VecI32 (splat_vector anyimm:$V)), (V62splatiw imm:$V)>; def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), (Rep (V62splatib imm:$V))>; def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V62splatih imm:$V))>; def: Pat<(VecPI32 (splat_vector anyimm:$V)), (Rep (V62splatiw imm:$V))>; } let AddedComplexity = 20 in { def: Pat<(VecI8 (splat_vector I32:$Rs)), (V62splatrb $Rs)>; def: Pat<(VecI16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>; def: Pat<(VecI32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>; def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>; def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>; def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>; } } let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { let AddedComplexity = 30 in { def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>; def: Pat<(VecF32 (splat_vector anyint:$V)), (V62splatiw imm:$V)>; def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (V62splatiw (ftoi $V))>; } let AddedComplexity = 20 in { def: Pat<(VecF16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>; def: Pat<(VecF32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>; def: Pat<(VecF32 (splat_vector F32:$Rs)), (V62splatrw $Rs)>; } } class Vneg1 : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>; class Vnot : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1)>; let Predicates = [UseHVX] in { let AddedComplexity = 200 in { def: Pat<(Vnot HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; def: Pat<(Vnot HVI16:$Vs), (V6_vnot HvxVR:$Vs)>; def: Pat<(Vnot HVI32:$Vs), (V6_vnot HvxVR:$Vs)>; } def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } // For now, we always deal with vector floating point in SF mode. class OpR_RR_pat_conv : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>; class OpR_RR_pat_conv_hf : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>; let Predicates = [UseHVXV68, UseHVXQFloat] in { def: OpR_RR_pat_conv_hf, VecF16, HVF16>; def: OpR_RR_pat_conv_hf, VecF16, HVF16>; def: OpR_RR_pat_conv_hf, VecF16, HVF16>; def: OpR_RR_pat_conv, VecF32, HVF32>; def: OpR_RR_pat_conv, VecF32, HVF32>; def: OpR_RR_pat_conv, VecF32, HVF32>; // For now we assume that the fp32 register is always coming in as IEEE float // since the qfloat arithmetic instructions above always generate the // accompanying conversions as part of their pattern def: Pat<(VecF16 (pf1 HWF32:$Vuu)), (V6_vdealh (V6_vconv_hf_qf32 (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) ))))>; // fpextend for QFloat is handled manually in HexagonISelLoweringHVX.cpp. } // HVX IEEE arithmetic Instructions let Predicates = [UseHVXV68, UseHVXIEEEFP] in { def: Pat<(fadd HVF16:$Rs, HVF16:$Rt), (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>; def: Pat<(fadd HVF32:$Rs, HVF32:$Rt), (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>; def: Pat<(fsub HVF16:$Rs, HVF16:$Rt), (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>; def: Pat<(fsub HVF32:$Rs, HVF32:$Rt), (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>; def: Pat<(fmul HVF16:$Rs, HVF16:$Rt), (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>; def: Pat<(fmul HVF32:$Rs, HVF32:$Rt), (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>; def: Pat<(VecF16 (pf1 HWF32:$Vuu)), (V6_vdealh (V6_vcvt_hf_sf (HiVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)))>; def: Pat<(VecPF32 (pf1 HVF16:$Vu)), (V6_vcvt_sf_hf (V6_vshuffh HvxVR:$Vu))>; def: OpR_R_pat; def: OpR_R_pat; def: OpR_R_pat; def: OpR_R_pat; def: Pat<(VecI8 (Fptosi HWF16:$Vu)), (V6_vcvt_b_hf (HiVec $Vu), (LoVec $Vu))>; def: Pat<(VecI8 (Fptoui HWF16:$Vu)), (V6_vcvt_ub_hf (HiVec $Vu), (LoVec $Vu))>; def: Pat<(VecPF16 (Sitofp HVI8:$Vu)), (V6_vcvt_hf_b HvxVR:$Vu)>; def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>; } let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in { let AddedComplexity = 220 in { defm: MinMax_pats; defm: MinMax_pats; defm: MinMax_pats; defm: MinMax_pats; } def: OpR_RR_pat, VecF16, HVF16>; def: OpR_RR_pat, VecF16, HVF16>; def: OpR_RR_pat, VecF32, HVF32>; def: OpR_RR_pat, VecF32, HVF32>; } let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in { let AddedComplexity = 220 in { defm: MinMax_pats; defm: MinMax_pats; defm: MinMax_pats; defm: MinMax_pats; } def: OpR_RR_pat, VecF16, HVF16>; def: OpR_RR_pat, VecF16, HVF16>; def: OpR_RR_pat, VecF32, HVF32>; def: OpR_RR_pat, VecF32, HVF32>; } let Predicates = [UseHVX] in { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). def: Pat<(mul HVI8:$Vs, HVI8:$Vt), (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)), (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>; def: Pat<(mul HVI16:$Vs, HVI16:$Vt), (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(mul HVI32:$Vs, HVI32:$Vt), (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt), HvxVR:$Vs, HvxVR:$Vt)>; } let Predicates = [UseHVX] in { def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>; def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>; def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>; def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>; def: Pat<(VecI32 (sext_invec HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>; def: Pat<(VecPI16 (sext_invec HWI8:$Vss)), (VSxtb (LoVec $Vss))>; def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>; def: Pat<(VecPI32 (sext_invec HWI8:$Vss)), (VSxth (LoVec (VSxtb (LoVec $Vss))))>; def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>; def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>; def: Pat<(VecI32 (zext_invec HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>; def: Pat<(VecPI16 (zext_invec HWI8:$Vss)), (VZxtb (LoVec $Vss))>; def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>; def: Pat<(VecPI32 (zext_invec HWI8:$Vss)), (VZxth (LoVec (VZxtb (LoVec $Vss))))>; def: Pat<(VecI8 (trunc HWI16:$Vss)), (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; def: Pat<(VecI16 (trunc HWI32:$Vss)), (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>; def: Pat<(VecQ8 (trunc HVI8:$Vs)), (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; def: Pat<(VecQ16 (trunc HVI16:$Vs)), (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; def: Pat<(VecQ32 (trunc HVI32:$Vs)), (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; } let Predicates = [UseHVX] in { // The "source" types are not legal, and there are no parameterized // definitions for them, but they are length-specific. let Predicates = [UseHVX,UseHVX64B] in { def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)), (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)), (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)), (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; } let Predicates = [UseHVX,UseHVX128B] in { def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)), (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)), (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)), (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; } // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen). def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)), (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)), (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)), (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt), (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt), (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt), (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>; def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt), (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>; def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>; def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>; def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>; def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>; def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>; def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)), (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)), (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; // Vpackl is a pseudo-op that is used when legalizing widened truncates. // It should never be produced with a register pair in the output, but // it can happen to have a pair as an input. def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>; def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>; def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>; def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>; def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>; def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>; def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>; def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>; def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>; def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>; def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>; def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>; def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>; def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>; def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>; let Predicates = [UseHVX,UseHVXV60] in { def: Pat<(VecI16 (bswap HVI16:$Vs)), (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>; def: Pat<(VecI32 (bswap HVI32:$Vs)), (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>; } let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in { def: Pat<(VecI16 (bswap HVI16:$Vs)), (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>; def: Pat<(VecI32 (bswap HVI32:$Vs)), (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>; } def: Pat<(VecI8 (ctpop HVI8:$Vs)), (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))), (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>; def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>; def: Pat<(VecI32 (ctpop HVI32:$Vs)), (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))), (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>; let Predicates = [UseHVX,UseHVXV60] in def: Pat<(VecI8 (ctlz HVI8:$Vs)), (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), (V60splatib (i32 0x08)))>; let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in def: Pat<(VecI8 (ctlz HVI8:$Vs)), (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), (V62splatib (i32 0x08)))>; def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>; def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>; } class HvxSel_pat : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; let Predicates = [UseHVX] in { def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; } def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>; let Predicates = [UseHVX] in { def: Pat<(select I1:$Pu, VecQ8:$Qs, VecQ8:$Qt), (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; def: Pat<(select I1:$Pu, VecQ16:$Qs, VecQ16:$Qt), (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; def: Pat<(select I1:$Pu, VecQ32:$Qs, VecQ32:$Qt), (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; } let Predicates = [UseHVX] in { def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>; def: Pat<(VecQ32 (qtrue)), (PS_qtrue)>; def: Pat<(VecQ8 (qfalse)), (PS_qfalse)>; def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>; def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>; def: Pat<(vnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; def: Pat<(qnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat, VecQ8, HQ8>; def: OpR_RR_pat, VecQ16, HQ16>; def: OpR_RR_pat, VecQ32, HQ32>; def: OpR_RR_pat, VecQ8, HQ8>; def: OpR_RR_pat, VecQ16, HQ16>; def: OpR_RR_pat, VecQ32, HQ32>; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; } let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: AccRRR_pat; def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)), (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>; def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)), (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>; }