Pull in r211435 from upstream llvm trunk (by Benjamin Kramer): Legalizer: Add support for splitting insert_subvectors. We handle this by spilling the whole thing to the stack and doing the insertion as a store. PR19492. This happens in real code because the vectorizer creates v2i128 when AVX is enabled. This fixes a "fatal error: error in backend: Do not know how to split the result of this operator!" message encountered during compilation of the net-p2p/libtorrent-rasterbar port. Introduced here: http://svnweb.freebsd.org/changeset/base/267704 Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -569,6 +569,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -506,6 +506,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -725,6 +726,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECT TLI.getVectorIdxTy())); } +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); Index: test/CodeGen/X86/vec_split.ll =================================================================== --- test/CodeGen/X86/vec_split.ll +++ test/CodeGen/X86/vec_split.ll @@ -40,3 +40,36 @@ define <32 x i16> @split32(<32 x i16> %a, <32 x i1 %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b ret <32 x i16> %2 } + +; PR19492 +define i128 @split128(<2 x i128> %a, <2 x i128> %b) { +; SSE4-LABEL: split128: +; SSE4: addq +; SSE4: adcq +; SSE4: addq +; SSE4: adcq +; SSE4: addq +; SSE4: adcq +; SSE4: ret +; AVX1-LABEL: split128: +; AVX1: addq +; AVX1: adcq +; AVX1: addq +; AVX1: adcq +; AVX1: addq +; AVX1: adcq +; AVX1: ret +; AVX2-LABEL: split128: +; AVX2: addq +; AVX2: adcq +; AVX2: addq +; AVX2: adcq +; AVX2: addq +; AVX2: adcq +; AVX2: ret + %add = add nsw <2 x i128> %a, %b + %rdx.shuf = shufflevector <2 x i128> %add, <2 x i128> undef, <2 x i32> + %bin.rdx = add <2 x i128> %add, %rdx.shuf + %e = extractelement <2 x i128> %bin.rdx, i32 1 + ret i128 %e +}