//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/Support/CommandLine.h" using namespace llvm; static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; void HexagonTargetLowering::initializeHVXLowering() { if (Subtarget.useHVX64BOps()) { addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); // These "short" boolean vector types should be legal because // they will appear as results of vector compares. If they were // not legal, type legalization would try to make them legal // and that would require using operations that do not use or // produce such types. That, in turn, would imply using custom // nodes, which would be unoptimizable by the DAG combiner. // The idea is to rely on target-independent operations as much // as possible. addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); } else if (Subtarget.useHVX128BOps()) { addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); } // Set up operation actions. bool Use64b = Subtarget.useHVX64BOps(); ArrayRef LegalV = Use64b ? LegalV64 : LegalV128; ArrayRef LegalW = Use64b ? LegalW64 : LegalW128; MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { setOperationAction(Opc, FromTy, Promote); AddPromotedToType(Opc, FromTy, ToTy); }; // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). // Note: v16i1 -> i16 is handled in type legalization instead of op // legalization. setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); setOperationAction(ISD::BITCAST, MVT::i128, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); for (MVT T : LegalV) { setIndexedLoadAction(ISD::POST_INC, T, Legal); setIndexedStoreAction(ISD::POST_INC, T, Legal); setOperationAction(ISD::AND, T, Legal); setOperationAction(ISD::OR, T, Legal); setOperationAction(ISD::XOR, T, Legal); setOperationAction(ISD::ADD, T, Legal); setOperationAction(ISD::SUB, T, Legal); setOperationAction(ISD::CTPOP, T, Legal); setOperationAction(ISD::CTLZ, T, Legal); if (T != ByteV) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::BSWAP, T, Legal); } setOperationAction(ISD::CTTZ, T, Custom); setOperationAction(ISD::LOAD, T, Custom); setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); setOperationAction(ISD::MULHU, T, Custom); setOperationAction(ISD::BUILD_VECTOR, T, Custom); // Make concat-vectors custom to handle concats of more than 2 vectors. setOperationAction(ISD::CONCAT_VECTORS, T, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); setOperationAction(ISD::ANY_EXTEND, T, Custom); setOperationAction(ISD::SIGN_EXTEND, T, Custom); setOperationAction(ISD::ZERO_EXTEND, T, Custom); if (T != ByteV) { setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); // HVX only has shifts of words and halfwords. setOperationAction(ISD::SRA, T, Custom); setOperationAction(ISD::SHL, T, Custom); setOperationAction(ISD::SRL, T, Custom); // Promote all shuffles to operate on vectors of bytes. setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); } setCondCodeAction(ISD::SETNE, T, Expand); setCondCodeAction(ISD::SETLE, T, Expand); setCondCodeAction(ISD::SETGE, T, Expand); setCondCodeAction(ISD::SETLT, T, Expand); setCondCodeAction(ISD::SETULE, T, Expand); setCondCodeAction(ISD::SETUGE, T, Expand); setCondCodeAction(ISD::SETULT, T, Expand); } for (MVT T : LegalW) { // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- // independent) handling of it would convert it to a load, which is // not always the optimal choice. setOperationAction(ISD::BUILD_VECTOR, T, Custom); // Make concat-vectors custom to handle concats of more than 2 vectors. setOperationAction(ISD::CONCAT_VECTORS, T, Custom); // Custom-lower these operations for pairs. Expand them into a concat // of the corresponding operations on individual vectors. setOperationAction(ISD::ANY_EXTEND, T, Custom); setOperationAction(ISD::SIGN_EXTEND, T, Custom); setOperationAction(ISD::ZERO_EXTEND, T, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::LOAD, T, Custom); setOperationAction(ISD::STORE, T, Custom); setOperationAction(ISD::CTLZ, T, Custom); setOperationAction(ISD::CTTZ, T, Custom); setOperationAction(ISD::CTPOP, T, Custom); setOperationAction(ISD::ADD, T, Legal); setOperationAction(ISD::SUB, T, Legal); setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); setOperationAction(ISD::MULHU, T, Custom); setOperationAction(ISD::AND, T, Custom); setOperationAction(ISD::OR, T, Custom); setOperationAction(ISD::XOR, T, Custom); setOperationAction(ISD::SETCC, T, Custom); setOperationAction(ISD::VSELECT, T, Custom); if (T != ByteW) { setOperationAction(ISD::SRA, T, Custom); setOperationAction(ISD::SHL, T, Custom); setOperationAction(ISD::SRL, T, Custom); // Promote all shuffles to operate on vectors of bytes. setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); } } // Boolean vectors. for (MVT T : LegalW) { // Boolean types for vector pairs will overlap with the boolean // types for single vectors, e.g. // v64i8 -> v64i1 (single) // v64i16 -> v64i1 (pair) // Set these actions first, and allow the single actions to overwrite // any duplicates. MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); setOperationAction(ISD::SETCC, BoolW, Custom); setOperationAction(ISD::AND, BoolW, Custom); setOperationAction(ISD::OR, BoolW, Custom); setOperationAction(ISD::XOR, BoolW, Custom); } for (MVT T : LegalV) { MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); setOperationAction(ISD::AND, BoolV, Legal); setOperationAction(ISD::OR, BoolV, Legal); setOperationAction(ISD::XOR, BoolV, Legal); } if (Use64b) { for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); } else { for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); } setTargetDAGCombine(ISD::VSELECT); } SDValue HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, const SDLoc &dl, SelectionDAG &DAG) const { SmallVector IntOps; IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); for (const SDValue &Op : Ops) IntOps.push_back(Op); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); } MVT HexagonTargetLowering::typeJoin(const TypePair &Tys) const { assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); MVT ElemTy = Tys.first.getVectorElementType(); return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + Tys.second.getVectorNumElements()); } HexagonTargetLowering::TypePair HexagonTargetLowering::typeSplit(MVT VecTy) const { assert(VecTy.isVector()); unsigned NumElem = VecTy.getVectorNumElements(); assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); return { HalfTy, HalfTy }; } MVT HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { MVT ElemTy = VecTy.getVectorElementType(); MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); } MVT HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { MVT ElemTy = VecTy.getVectorElementType(); MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); } SDValue HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const { if (ty(Vec).getVectorElementType() == ElemTy) return Vec; MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); return DAG.getBitcast(CastTy, Vec); } SDValue HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, SelectionDAG &DAG) const { return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), Ops.second, Ops.first); } HexagonTargetLowering::VectorPair HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const { TypePair Tys = typeSplit(ty(Vec)); if (Vec.getOpcode() == HexagonISD::QCAT) return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); } bool HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { return Subtarget.isHVXVectorType(Ty) && Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); } bool HexagonTargetLowering::isHvxPairTy(MVT Ty) const { return Subtarget.isHVXVectorType(Ty) && Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); } bool HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { return Subtarget.isHVXVectorType(Ty, true) && Ty.getVectorElementType() == MVT::i1; } bool HexagonTargetLowering::allowsHvxMemoryAccess( MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { // Bool vectors are excluded by default, but make it explicit to // emphasize that bool vectors cannot be loaded or stored. // Also, disallow double vector stores (to prevent unnecessary // store widening in DAG combiner). if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) return false; if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) return false; if (Fast) *Fast = true; return true; } bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { if (!Subtarget.isHVXVectorType(VecTy)) return false; // XXX Should this be false? vmemu are a bit slower than vmem. if (Fast) *Fast = true; return true; } SDValue HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const { if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); unsigned ElemWidth = ElemTy.getSizeInBits(); if (ElemWidth == 8) return ElemIdx; unsigned L = Log2_32(ElemWidth/8); const SDLoc &dl(ElemIdx); return DAG.getNode(ISD::SHL, dl, MVT::i32, {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); } SDValue HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const { unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); if (ElemWidth == 32) return Idx; if (ty(Idx) != MVT::i32) Idx = DAG.getBitcast(MVT::i32, Idx); const SDLoc &dl(Idx); SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); return SubIdx; } SDValue HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, ArrayRef Mask, SelectionDAG &DAG) const { MVT OpTy = ty(Op0); assert(OpTy == ty(Op1)); MVT ElemTy = OpTy.getVectorElementType(); if (ElemTy == MVT::i8) return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); assert(ElemTy.getSizeInBits() >= 8); MVT ResTy = tyVector(OpTy, MVT::i8); unsigned ElemSize = ElemTy.getSizeInBits() / 8; SmallVector ByteMask; for (int M : Mask) { if (M < 0) { for (unsigned I = 0; I != ElemSize; ++I) ByteMask.push_back(-1); } else { int NewM = M*ElemSize; for (unsigned I = 0; I != ElemSize; ++I) ByteMask.push_back(NewM+I); } } assert(ResTy.getVectorNumElements() == ByteMask.size()); return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), opCastElem(Op1, MVT::i8, DAG), ByteMask); } SDValue HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { unsigned VecLen = Values.size(); MachineFunction &MF = DAG.getMachineFunction(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); unsigned HwLen = Subtarget.getVectorLength(); unsigned ElemSize = ElemWidth / 8; assert(ElemSize*VecLen == HwLen); SmallVector Words; if (VecTy.getVectorElementType() != MVT::i32) { assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); for (unsigned i = 0; i != VecLen; i += OpsPerWord) { SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); Words.push_back(DAG.getBitcast(MVT::i32, W)); } } else { Words.assign(Values.begin(), Values.end()); } unsigned NumWords = Words.size(); bool IsSplat = true, IsUndef = true; SDValue SplatV; for (unsigned i = 0; i != NumWords && IsSplat; ++i) { if (isUndef(Words[i])) continue; IsUndef = false; if (!SplatV.getNode()) SplatV = Words[i]; else if (SplatV != Words[i]) IsSplat = false; } if (IsUndef) return DAG.getUNDEF(VecTy); if (IsSplat) { assert(SplatV.getNode()); auto *IdxN = dyn_cast(SplatV.getNode()); if (IdxN && IdxN->isNullValue()) return getZero(dl, VecTy, DAG); return DAG.getNode(HexagonISD::VSPLATW, dl, VecTy, SplatV); } // Delay recognizing constant vectors until here, so that we can generate // a vsplat. SmallVector Consts(VecLen); bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); if (AllConst) { ArrayRef Tmp((Constant**)Consts.begin(), (Constant**)Consts.end()); Constant *CV = ConstantVector::get(Tmp); Align Alignment(HwLen); SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG); return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(MF), Alignment); } // A special case is a situation where the vector is built entirely from // elements extracted from another vector. This could be done via a shuffle // more efficiently, but typically, the size of the source vector will not // match the size of the vector being built (which precludes the use of a // shuffle directly). // This only handles a single source vector, and the vector being built // should be of a sub-vector type of the source vector type. auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec, SmallVectorImpl &SrcIdx) { SDValue Vec; for (SDValue V : Values) { if (isUndef(V)) { SrcIdx.push_back(-1); continue; } if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; // All extracts should come from the same vector. SDValue T = V.getOperand(0); if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) return false; Vec = T; ConstantSDNode *C = dyn_cast(V.getOperand(1)); if (C == nullptr) return false; int I = C->getSExtValue(); assert(I >= 0 && "Negative element index"); SrcIdx.push_back(I); } SrcVec = Vec; return true; }; SmallVector ExtIdx; SDValue ExtVec; if (IsBuildFromExtracts(ExtVec, ExtIdx)) { MVT ExtTy = ty(ExtVec); unsigned ExtLen = ExtTy.getVectorNumElements(); if (ExtLen == VecLen || ExtLen == 2*VecLen) { // Construct a new shuffle mask that will produce a vector with the same // number of elements as the input vector, and such that the vector we // want will be the initial subvector of it. SmallVector Mask; BitVector Used(ExtLen); for (int M : ExtIdx) { Mask.push_back(M); if (M >= 0) Used.set(M); } // Fill the rest of the mask with the unused elements of ExtVec in hopes // that it will result in a permutation of ExtVec's elements. It's still // fine if it doesn't (e.g. if undefs are present, or elements are // repeated), but permutations can always be done efficiently via vdelta // and vrdelta. for (unsigned I = 0; I != ExtLen; ++I) { if (Mask.size() == ExtLen) break; if (!Used.test(I)) Mask.push_back(I); } SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec, DAG.getUNDEF(ExtTy), Mask); if (ExtLen == VecLen) return S; return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S); } } // Construct two halves in parallel, then or them together. assert(4*Words.size() == Subtarget.getVectorLength()); SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); SDValue S = DAG.getConstant(4, dl, MVT::i32); for (unsigned i = 0; i != NumWords/2; ++i) { SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {HalfV0, Words[i]}); SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {HalfV1, Words[i+NumWords/2]}); HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S}); HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S}); } HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1}); return DstV; } SDValue HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { MVT PredTy = ty(PredV); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); if (Subtarget.isHVXVectorType(PredTy, true)) { // Move the vector predicate SubV to a vector register, and scale it // down to match the representation (bytes per type element) that VecV // uses. The scaling down will pick every 2nd or 4th (every Scale-th // in general) element and put them at the front of the resulting // vector. This subvector will then be inserted into the Q2V of VecV. // To avoid having an operation that generates an illegal type (short // vector), generate a full size vector. // SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); SmallVector Mask(HwLen); // Scale = BitBytes(PredV) / Given BitBytes. unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; for (unsigned i = 0; i != HwLen; ++i) { unsigned Num = i % Scale; unsigned Off = i / Scale; Mask[BlockLen*Num + Off] = i; } SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); if (!ZeroFill) return S; // Fill the bytes beyond BlockLen with 0s. MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); return DAG.getNode(ISD::AND, dl, ByteTy, S, M); } // Make sure that this is a valid scalar predicate. assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); unsigned Bytes = 8 / PredTy.getVectorNumElements(); SmallVector Words[2]; unsigned IdxW = 0; auto Lo32 = [&DAG, &dl] (SDValue P) { return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); }; auto Hi32 = [&DAG, &dl] (SDValue P) { return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); }; SDValue W0 = isUndef(PredV) ? DAG.getUNDEF(MVT::i64) : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); Words[IdxW].push_back(Hi32(W0)); Words[IdxW].push_back(Lo32(W0)); while (Bytes < BitBytes) { IdxW ^= 1; Words[IdxW].clear(); if (Bytes < 4) { for (const SDValue &W : Words[IdxW ^ 1]) { SDValue T = expandPredicate(W, dl, DAG); Words[IdxW].push_back(Hi32(T)); Words[IdxW].push_back(Lo32(T)); } } else { for (const SDValue &W : Words[IdxW ^ 1]) { Words[IdxW].push_back(W); Words[IdxW].push_back(W); } } Bytes *= 2; } assert(Bytes == BitBytes); SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); for (const SDValue &W : Words[IdxW]) { Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); } return Vec; } SDValue HexagonTargetLowering::buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { // Construct a vector V of bytes, such that a comparison V >u 0 would // produce the required vector predicate. unsigned VecLen = Values.size(); unsigned HwLen = Subtarget.getVectorLength(); assert(VecLen <= HwLen || VecLen == 8*HwLen); SmallVector Bytes; bool AllT = true, AllF = true; auto IsTrue = [] (SDValue V) { if (const auto *N = dyn_cast(V.getNode())) return !N->isNullValue(); return false; }; auto IsFalse = [] (SDValue V) { if (const auto *N = dyn_cast(V.getNode())) return N->isNullValue(); return false; }; if (VecLen <= HwLen) { // In the hardware, each bit of a vector predicate corresponds to a byte // of a vector register. Calculate how many bytes does a bit of VecTy // correspond to. assert(HwLen % VecLen == 0); unsigned BitBytes = HwLen / VecLen; for (SDValue V : Values) { AllT &= IsTrue(V); AllF &= IsFalse(V); SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) : DAG.getUNDEF(MVT::i8); for (unsigned B = 0; B != BitBytes; ++B) Bytes.push_back(Ext); } } else { // There are as many i1 values, as there are bits in a vector register. // Divide the values into groups of 8 and check that each group consists // of the same value (ignoring undefs). for (unsigned I = 0; I != VecLen; I += 8) { unsigned B = 0; // Find the first non-undef value in this group. for (; B != 8; ++B) { if (!Values[I+B].isUndef()) break; } SDValue F = Values[I+B]; AllT &= IsTrue(F); AllF &= IsFalse(F); SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) : DAG.getUNDEF(MVT::i8); Bytes.push_back(Ext); // Verify that the rest of values in the group are the same as the // first. for (; B != 8; ++B) assert(Values[I+B].isUndef() || Values[I+B] == F); } } if (AllT) return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); if (AllF) return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); } SDValue HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT ElemTy = ty(VecV).getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); (void)ElemWidth; SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {VecV, ByteIdx}); if (ElemTy == MVT::i32) return ExWord; // Have an extracted word, need to extract the smaller element out of it. // 1. Extract the bits of (the original) IdxV that correspond to the index // of the desired element in the 32-bit word. SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); // 2. Extract the element from the word. SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); } SDValue HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { // Implement other return types if necessary. assert(ResTy == MVT::i1); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); } SDValue HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { MVT ElemTy = ty(VecV).getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); (void)ElemWidth; auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, SDValue ByteIdxV) { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); return TorV; }; SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); if (ElemTy == MVT::i32) return InsertWord(VecV, ValV, ByteIdx); // If this is not inserting a 32-bit word, convert it into such a thing. // 1. Extract the existing word from the target vector. SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, dl, MVT::i32, DAG); // 2. Treating the extracted word as a 32-bit vector, insert the given // value into it. SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); MVT SubVecTy = tyVector(ty(Ext), ElemTy); SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), ValV, SubIdx, dl, ElemTy, DAG); // 3. Insert the 32-bit word back into the original vector. return InsertWord(VecV, Ins, ByteIdx); } SDValue HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); } SDValue HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); unsigned Idx = cast(IdxV.getNode())->getZExtValue(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); // If the source vector is a vector pair, get the single vector containing // the subvector of interest. The subvector will never overlap two single // vectors. if (isHvxPairTy(VecTy)) { unsigned SubIdx; if (Idx * ElemWidth >= 8*HwLen) { SubIdx = Hexagon::vsub_hi; Idx -= VecTy.getVectorNumElements() / 2; } else { SubIdx = Hexagon::vsub_lo; } VecTy = typeSplit(VecTy).first; VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); if (VecTy == ResTy) return VecV; } // The only meaningful subvectors of a single HVX vector are those that // fit in a scalar register. assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); MVT WordTy = tyVector(VecTy, MVT::i32); SDValue WordVec = DAG.getBitcast(WordTy, VecV); unsigned WordIdx = (Idx*ElemWidth) / 32; SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); if (ResTy.getSizeInBits() == 32) return DAG.getBitcast(ResTy, W0); SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); return DAG.getBitcast(ResTy, WW); } SDValue HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); // IdxV is required to be a constant. unsigned Idx = cast(IdxV.getNode())->getZExtValue(); unsigned ResLen = ResTy.getVectorNumElements(); unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); unsigned Offset = Idx * BitBytes; SDValue Undef = DAG.getUNDEF(ByteTy); SmallVector Mask; if (Subtarget.isHVXVectorType(ResTy, true)) { // Converting between two vector predicates. Since the result is shorter // than the source, it will correspond to a vector predicate with the // relevant bits replicated. The replication count is the ratio of the // source and target vector lengths. unsigned Rep = VecTy.getVectorNumElements() / ResLen; assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); for (unsigned i = 0; i != HwLen/Rep; ++i) { for (unsigned j = 0; j != Rep; ++j) Mask.push_back(i + Offset); } SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); } // Converting between a vector predicate and a scalar predicate. In the // vector predicate, a group of BitBytes bits will correspond to a single // i1 element of the source vector type. Those bits will all have the same // value. The same will be true for ByteVec, where each byte corresponds // to a bit in the vector predicate. // The algorithm is to traverse the ByteVec, going over the i1 values from // the source vector, and generate the corresponding representation in an // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the // elements so that the interesting 8 bytes will be in the low end of the // vector. unsigned Rep = 8 / ResLen; // Make sure the output fill the entire vector register, so repeat the // 8-byte groups as many times as necessary. for (unsigned r = 0; r != HwLen/ResLen; ++r) { // This will generate the indexes of the 8 interesting bytes. for (unsigned i = 0; i != ResLen; ++i) { for (unsigned j = 0; j != Rep; ++j) Mask.push_back(Offset + i*BitBytes); } } SDValue Zero = getZero(dl, MVT::i32, DAG); SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); // Combine the two low words from ShuffV into a v8i8, and byte-compare // them against 0. SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); } SDValue HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); MVT SubTy = ty(SubV); unsigned HwLen = Subtarget.getVectorLength(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); bool IsPair = isHvxPairTy(VecTy); MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); // The two single vectors that VecV consists of, if it's a pair. SDValue V0, V1; SDValue SingleV = VecV; SDValue PickHi; if (IsPair) { V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), dl, MVT::i32); PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); if (isHvxSingleTy(SubTy)) { if (const auto *CN = dyn_cast(IdxV.getNode())) { unsigned Idx = CN->getZExtValue(); assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); } // If IdxV is not a constant, generate the two variants: with the // SubV as the high and as the low subregister, and select the right // pair based on the IdxV. SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); } // The subvector being inserted must be entirely contained in one of // the vectors V0 or V1. Set SingleV to the correct one, and update // IdxV to be the index relative to the beginning of that vector. SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); } // The only meaningful subvectors of a single HVX vector are those that // fit in a scalar register. assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); // Convert IdxV to be index in bytes. auto *IdxN = dyn_cast(IdxV.getNode()); if (!IdxN || !IdxN->isNullValue()) { IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, DAG.getConstant(ElemWidth/8, dl, MVT::i32)); SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); } // When inserting a single word, the rotation back to the original position // would be by HwLen-Idx, but if two words are inserted, it will need to be // by (HwLen-4)-Idx. unsigned RolBase = HwLen; if (VecTy.getSizeInBits() == 32) { SDValue V = DAG.getBitcast(MVT::i32, SubV); SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); } else { SDValue V = DAG.getBitcast(MVT::i64, SubV); SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, DAG.getConstant(4, dl, MVT::i32)); SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); RolBase = HwLen-4; } // If the vector wasn't ror'ed, don't ror it back. if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) { SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(RolBase, dl, MVT::i32), IdxV); SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); } if (IsPair) { SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); } return SingleV; } SDValue HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); MVT SubTy = ty(SubV); assert(Subtarget.isHVXVectorType(VecTy, true)); // VecV is an HVX vector predicate. SubV may be either an HVX vector // predicate as well, or it can be a scalar predicate. unsigned VecLen = VecTy.getVectorNumElements(); unsigned HwLen = Subtarget.getVectorLength(); assert(HwLen % VecLen == 0 && "Unexpected vector type"); unsigned Scale = VecLen / SubTy.getVectorNumElements(); unsigned BitBytes = HwLen / VecLen; unsigned BlockLen = HwLen / Scale; MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); SDValue ByteIdx; auto *IdxN = dyn_cast(IdxV.getNode()); if (!IdxN || !IdxN->isNullValue()) { ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, DAG.getConstant(BitBytes, dl, MVT::i32)); ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); } // ByteVec is the target vector VecV rotated in such a way that the // subvector should be inserted at index 0. Generate a predicate mask // and use vmux to do the insertion. MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); // Rotate ByteVec back, and convert to a vector predicate. if (!IdxN || !IdxN->isNullValue()) { SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); } return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); } SDValue HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { // Sign- and any-extending of a vector predicate to a vector register is // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and // a vector of 1s (where the 1s are of type matching the vector type). assert(Subtarget.isHVXVectorType(ResTy)); if (!ZeroExt) return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); SDValue True = DAG.getNode(HexagonISD::VSPLAT, dl, ResTy, DAG.getConstant(1, dl, MVT::i32)); SDValue False = getZero(dl, ResTy, DAG); return DAG.getSelect(dl, ResTy, VecV, True, False); } SDValue HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] // (i.e. the entire predicate register) to bits [0..HwLen-1] of a // vector register. The remaining bits of the vector register are // unspecified. MachineFunction &MF = DAG.getMachineFunction(); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); MVT PredTy = ty(VecQ); unsigned PredLen = PredTy.getVectorNumElements(); assert(HwLen % PredLen == 0); MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen); Type *Int8Ty = Type::getInt8Ty(*DAG.getContext()); SmallVector Tmp; // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... // These are bytes with the LSB rotated left with respect to their index. for (unsigned i = 0; i != HwLen/8; ++i) { for (unsigned j = 0; j != 8; ++j) Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j)); } Constant *CV = ConstantVector::get(Tmp); Align Alignment(HwLen); SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG); SDValue Bytes = DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(MF), Alignment); // Select the bytes that correspond to true bits in the vector predicate. SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes), getZero(dl, VecTy, DAG)); // Calculate the OR of all bytes in each group of 8. That will compress // all the individual bits into a single byte. // First, OR groups of 4, via vrmpy with 0x01010101. SDValue All1 = DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32)); SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG); // Then rotate the accumulated vector by 4 bytes, and do the final OR. SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy, {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG); SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot}); // Pick every 8th byte and coalesce them at the beginning of the output. // For symmetry, coalesce every 1+8th byte after that, then every 2+8th // byte and so on. SmallVector Mask; for (unsigned i = 0; i != HwLen; ++i) Mask.push_back((8*i) % HwLen + i/(HwLen/8)); SDValue Collect = DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask); return DAG.getBitcast(ResTy, Collect); } SDValue HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); MVT VecTy = ty(Op); unsigned Size = Op.getNumOperands(); SmallVector Ops; for (unsigned i = 0; i != Size; ++i) Ops.push_back(Op.getOperand(i)); if (VecTy.getVectorElementType() == MVT::i1) return buildHvxVectorPred(Ops, dl, VecTy, DAG); if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { ArrayRef A(Ops); MVT SingleTy = typeSplit(VecTy).first; SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); } return buildHvxVectorReg(Ops, dl, VecTy, DAG); } SDValue HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const { // Vector concatenation of two integer (non-bool) vectors does not need // special lowering. Custom-lower concats of bool vectors and expand // concats of more than 2 vectors. MVT VecTy = ty(Op); const SDLoc &dl(Op); unsigned NumOp = Op.getNumOperands(); if (VecTy.getVectorElementType() != MVT::i1) { if (NumOp == 2) return Op; // Expand the other cases into a build-vector. SmallVector Elems; for (SDValue V : Op.getNode()->ops()) DAG.ExtractVectorElements(V, Elems); // A vector of i16 will be broken up into a build_vector of i16's. // This is a problem, since at the time of operation legalization, // all operations are expected to be type-legalized, and i16 is not // a legal type. If any of the extracted elements is not of a valid // type, sign-extend it to a valid one. for (unsigned i = 0, e = Elems.size(); i != e; ++i) { SDValue V = Elems[i]; MVT Ty = ty(V); if (!isTypeLegal(Ty)) { EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, V.getOperand(0), V.getOperand(1)), DAG.getValueType(Ty)); continue; } // A few less complicated cases. if (V.getOpcode() == ISD::Constant) Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); else if (V.isUndef()) Elems[i] = DAG.getUNDEF(NTy); else llvm_unreachable("Unexpected vector element"); } } return DAG.getBuildVector(VecTy, dl, Elems); } assert(VecTy.getVectorElementType() == MVT::i1); unsigned HwLen = Subtarget.getVectorLength(); assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); SDValue Op0 = Op.getOperand(0); // If the operands are HVX types (i.e. not scalar predicates), then // defer the concatenation, and create QCAT instead. if (Subtarget.isHVXVectorType(ty(Op0), true)) { if (NumOp == 2) return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); ArrayRef U(Op.getNode()->ops()); SmallVector SV(U.begin(), U.end()); ArrayRef Ops(SV); MVT HalfTy = typeSplit(VecTy).first; SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, Ops.take_front(NumOp/2)); SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, Ops.take_back(NumOp/2)); return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); } // Count how many bytes (in a vector register) each bit in VecTy // corresponds to. unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); SmallVector Prefixes; for (SDValue V : Op.getNode()->op_values()) { SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); Prefixes.push_back(P); } unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); SDValue Res = getZero(dl, ByteTy, DAG); for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); } return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); } SDValue HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const { // Change the type of the extracted element to i32. SDValue VecV = Op.getOperand(0); MVT ElemTy = ty(VecV).getVectorElementType(); const SDLoc &dl(Op); SDValue IdxV = Op.getOperand(1); if (ElemTy == MVT::i1) return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); } SDValue HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); SDValue VecV = Op.getOperand(0); SDValue ValV = Op.getOperand(1); SDValue IdxV = Op.getOperand(2); MVT ElemTy = ty(VecV).getVectorElementType(); if (ElemTy == MVT::i1) return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); } SDValue HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const { SDValue SrcV = Op.getOperand(0); MVT SrcTy = ty(SrcV); MVT DstTy = ty(Op); SDValue IdxV = Op.getOperand(1); unsigned Idx = cast(IdxV.getNode())->getZExtValue(); assert(Idx % DstTy.getVectorNumElements() == 0); (void)Idx; const SDLoc &dl(Op); MVT ElemTy = SrcTy.getVectorElementType(); if (ElemTy == MVT::i1) return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); } SDValue HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const { // Idx does not need to be a constant. SDValue VecV = Op.getOperand(0); SDValue ValV = Op.getOperand(1); SDValue IdxV = Op.getOperand(2); const SDLoc &dl(Op); MVT VecTy = ty(VecV); MVT ElemTy = VecTy.getVectorElementType(); if (ElemTy == MVT::i1) return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); } SDValue HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { // Lower any-extends of boolean vectors to sign-extends, since they // translate directly to Q2V. Zero-extending could also be done equally // fast, but Q2V is used/recognized in more places. // For all other vectors, use zero-extend. MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); MVT ElemTy = ty(InpV).getVectorElementType(); if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) return LowerHvxSignExt(Op, DAG); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); } SDValue HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); MVT ElemTy = ty(InpV).getVectorElementType(); if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); return Op; } SDValue HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); MVT ElemTy = ty(InpV).getVectorElementType(); if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); return Op; } SDValue HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) const SDLoc &dl(Op); MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); assert(ResTy == ty(InpV)); // Calculate the vectors of 1 and bitwidth(x). MVT ElemTy = ty(InpV).getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); // Using uint64_t because a shift by 32 can happen. uint64_t Splat1 = 0, SplatW = 0; assert(isPowerOf2_32(ElemWidth) && ElemWidth <= 32); for (unsigned i = 0; i != 32/ElemWidth; ++i) { Splat1 = (Splat1 << ElemWidth) | 1; SplatW = (SplatW << ElemWidth) | ElemWidth; } SDValue Vec1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, DAG.getConstant(uint32_t(Splat1), dl, MVT::i32)); SDValue VecW = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, DAG.getConstant(uint32_t(SplatW), dl, MVT::i32)); SDValue VecN1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, DAG.getConstant(-1, dl, MVT::i32)); // Do not use DAG.getNOT, because that would create BUILD_VECTOR with // a BITCAST. Here we can skip the BITCAST (so we don't have to handle // it separately in custom combine or selection). SDValue A = DAG.getNode(ISD::AND, dl, ResTy, {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); return DAG.getNode(ISD::SUB, dl, ResTy, {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); } SDValue HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); assert(ResTy.isVector() && isHvxSingleTy(ResTy)); const SDLoc &dl(Op); SmallVector ShuffMask; MVT ElemTy = ResTy.getVectorElementType(); unsigned VecLen = ResTy.getVectorNumElements(); SDValue Vs = Op.getOperand(0); SDValue Vt = Op.getOperand(1); switch (ElemTy.SimpleTy) { case MVT::i8: { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). MVT ExtTy = typeExtElem(ResTy, 2); unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyhv; SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); // Discard high halves of the resulting values, collect the low halves. for (unsigned I = 0; I < VecLen; I += 2) { ShuffMask.push_back(I); // Pick even element. ShuffMask.push_back(I+VecLen); // Pick odd element. } VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); return DAG.getBitcast(ResTy, BS); } case MVT::i16: // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode. // (There is also V6_vmpyhv, which behaves in an analogous way to // V6_vmpybv.) return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG); case MVT::i32: { // Use the following sequence for signed word multiply: // T0 = V6_vmpyiowh Vs, Vt // T1 = V6_vaslw T0, 16 // T2 = V6_vmpyiewuh_acc T1, Vs, Vt SDValue S16 = DAG.getConstant(16, dl, MVT::i32); SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG); SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG); SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, {T1, Vs, Vt}, DAG); return T2; } default: break; } return SDValue(); } SDValue HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); assert(ResTy.isVector()); const SDLoc &dl(Op); SmallVector ShuffMask; MVT ElemTy = ResTy.getVectorElementType(); unsigned VecLen = ResTy.getVectorNumElements(); SDValue Vs = Op.getOperand(0); SDValue Vt = Op.getOperand(1); bool IsSigned = Op.getOpcode() == ISD::MULHS; if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). // For i16, use V6_vmpyhv, which behaves in an analogous way to // V6_vmpybv: results Lo and Hi are products of even/odd elements // respectively. MVT ExtTy = typeExtElem(ResTy, 2); unsigned MpyOpc = ElemTy == MVT::i8 ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); // Discard low halves of the resulting values, collect the high halves. for (unsigned I = 0; I < VecLen; I += 2) { ShuffMask.push_back(I+1); // Pick even element. ShuffMask.push_back(I+VecLen+1); // Pick odd element. } VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); return DAG.getBitcast(ResTy, BS); } assert(ElemTy == MVT::i32); SDValue S16 = DAG.getConstant(16, dl, MVT::i32); if (IsSigned) { // mulhs(Vs,Vt) = // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 // + Lo(Vs) *us Vt] >> 32 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to // anything, so it cannot produce any carry over to higher bits), // so everything in [] can be shifted by 16 without loss of precision. // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 // Denote Hi(Vs) = Vs': // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); // Get Vs': SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, {T0, S0, Vt}, DAG); // Shift by 16: SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); // Get Vs'*Hi(Vt): SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); // Add: SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); return T3; } // Unsigned mulhw. (Would expansion using signed mulhw be better?) auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); }; auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); }; MVT PairTy = typeJoin({ResTy, ResTy}); SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); // Multiply-unsigned halfwords: // LoVec = Vs.uh[2i] * Vt.uh[2i], // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); // The low halves in the LoVec of the pair can be discarded. They are // not added to anything (in the full-precision product), so they cannot // produce a carry into the higher bits. SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); // Swap low and high halves in Vt, and do the halfword multiplication // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). // These products are words, but cannot be added directly because the // sums could overflow. Add these products, by halfwords, where each sum // of a pair of halfwords gives a word. SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {LoVec(T2), HiVec(T2)}, DAG); // Add the high halfwords from the products of the low halfwords. SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); return T7; } SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { SDValue ValQ = Op.getOperand(0); MVT ResTy = ty(Op); MVT VecTy = ty(ValQ); const SDLoc &dl(Op); if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) { unsigned HwLen = Subtarget.getVectorLength(); MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG); unsigned BitWidth = ResTy.getSizeInBits(); if (BitWidth < 64) { SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32), dl, MVT::i32, DAG); if (BitWidth == 32) return W0; assert(BitWidth < 32u); return DAG.getZExtOrTrunc(W0, dl, ResTy); } // The result is >= 64 bits. The only options are 64 or 128. assert(BitWidth == 64 || BitWidth == 128); SmallVector Words; for (unsigned i = 0; i != BitWidth/32; ++i) { SDValue W = extractHvxElementReg( VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG); Words.push_back(W); } SmallVector Combines; assert(Words.size() % 2 == 0); for (unsigned i = 0, e = Words.size(); i < e; i += 2) { SDValue C = DAG.getNode( HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]}); Combines.push_back(C); } if (BitWidth == 64) return Combines[0]; return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); } return Op; } SDValue HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { // Sign- and zero-extends are legal. assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op), Op.getOperand(0)); } SDValue HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { if (SDValue S = getVectorShiftByInt(Op, DAG)) return S; return Op; } SDValue HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); MVT ResTy = ty(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); bool Use64b = Subtarget.useHVX64BOps(); unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast : Intrinsic::hexagon_V6_pred_typecast_128B; if (IntNo == IntPredCast) { SDValue Vs = Op.getOperand(1); MVT OpTy = ty(Vs); if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) { if (ResTy == OpTy) return Vs; return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs); } } return Op; } SDValue HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { assert(!Op.isMachineOpcode()); SmallVector OpsL, OpsH; const SDLoc &dl(Op); auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; SDValue TV = DAG.getValueType(Ty); return std::make_pair(TV, TV); }; for (SDValue A : Op.getNode()->ops()) { VectorPair P = Subtarget.isHVXVectorType(ty(A), true) ? opSplit(A, dl, DAG) : std::make_pair(A, A); // Special case for type operand. if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { if (const auto *N = dyn_cast(A.getNode())) P = SplitVTNode(N); } OpsL.push_back(P.first); OpsH.push_back(P.second); } MVT ResTy = ty(Op); MVT HalfTy = typeSplit(ResTy).first; SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); return S; } SDValue HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { LSBaseSDNode *BN = cast(Op.getNode()); assert(BN->isUnindexed()); MVT MemTy = BN->getMemoryVT().getSimpleVT(); if (!isHvxPairTy(MemTy)) return Op; const SDLoc &dl(Op); unsigned HwLen = Subtarget.getVectorLength(); MVT SingleTy = typeSplit(MemTy).first; SDValue Chain = BN->getChain(); SDValue Base0 = BN->getBasePtr(); SDValue Base1 = DAG.getMemBasePlusOffset(Base0, HwLen, dl); MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; if (MachineMemOperand *MMO = BN->getMemOperand()) { MachineFunction &MF = DAG.getMachineFunction(); MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); } unsigned MemOpc = BN->getOpcode(); SDValue NewOp; if (MemOpc == ISD::LOAD) { SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); NewOp = DAG.getMergeValues( { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load0.getValue(1), Load1.getValue(1)) }, dl); } else { assert(MemOpc == ISD::STORE); VectorPair Vals = opSplit(cast(Op)->getValue(), dl, DAG); SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); NewOp = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); } return NewOp; } SDValue HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); bool IsPairOp = isHvxPairTy(ty(Op)) || llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { return isHvxPairTy(ty(V)); }); if (IsPairOp) { switch (Opc) { default: break; case ISD::LOAD: case ISD::STORE: return SplitHvxMemOp(Op, DAG); case ISD::CTPOP: case ISD::CTLZ: case ISD::CTTZ: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: case ISD::AND: case ISD::OR: case ISD::XOR: case ISD::SRA: case ISD::SHL: case ISD::SRL: case ISD::SETCC: case ISD::VSELECT: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND_INREG: return SplitHvxPairOp(Op, DAG); } } switch (Opc) { default: break; case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); case ISD::CTTZ: return LowerHvxCttz(Op, DAG); case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerHvxShift(Op, DAG); case ISD::MUL: return LowerHvxMul(Op, DAG); case ISD::MULHS: case ISD::MULHU: return LowerHvxMulh(Op, DAG); case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); case ISD::SETCC: case ISD::INTRINSIC_VOID: return Op; case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); // Unaligned loads will be handled by the default lowering. case ISD::LOAD: return SDValue(); } #ifndef NDEBUG Op.dumpr(&DAG); #endif llvm_unreachable("Unhandled HVX operation"); } void HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { } void HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { unsigned Opc = N->getOpcode(); switch (Opc) { case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); SDValue C = LowerHvxBitcast(Op, DAG); Results.push_back(C); } break; default: break; } } SDValue HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const SDLoc &dl(N); SDValue Op(N, 0); unsigned Opc = Op.getOpcode(); if (Opc == ISD::VSELECT) { // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) SDValue Cond = Op.getOperand(0); if (Cond->getOpcode() == ISD::XOR) { SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); if (C1->getOpcode() == HexagonISD::QTRUE) { SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Op.getOperand(2), Op.getOperand(1)); return VSel; } } } return SDValue(); } bool HexagonTargetLowering::isHvxOperation(SDValue Op) const { // If the type of the result, or any operand type are HVX vector types, // this is an HVX operation. return Subtarget.isHVXVectorType(ty(Op), true) || llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { return Subtarget.isHVXVectorType(ty(V), true); }); } bool HexagonTargetLowering::isHvxOperation(SDNode *N) const { // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. auto IsHvxTy = [this] (EVT Ty) { return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); }; auto IsHvxOp = [this] (SDValue Op) { return Subtarget.isHVXVectorType(ty(Op), true); }; return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp); }