contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  11 // both before and after the DAG is legalized.
  12 //
  13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14 // primarily intended to handle simplification opportunities that are implicit
  15 // in the LLVM IR and exposed by the various codegen lowering phases.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #include "llvm/ADT/SetVector.h"
  20 #include "llvm/ADT/SmallBitVector.h"
  21 #include "llvm/ADT/SmallPtrSet.h"
  22 #include "llvm/ADT/SmallSet.h"
  23 #include "llvm/ADT/Statistic.h"
  24 #include "llvm/Analysis/AliasAnalysis.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  29 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  30 #include "llvm/IR/DataLayout.h"
  31 #include "llvm/IR/DerivedTypes.h"
  32 #include "llvm/IR/Function.h"
  33 #include "llvm/IR/LLVMContext.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/ErrorHandling.h"
  37 #include "llvm/Support/KnownBits.h"
  38 #include "llvm/Support/MathExtras.h"
  39 #include "llvm/Support/raw_ostream.h"
  40 #include "llvm/Target/TargetLowering.h"
  41 #include "llvm/Target/TargetOptions.h"
  42 #include "llvm/Target/TargetRegisterInfo.h"
  43 #include "llvm/Target/TargetSubtargetInfo.h"
  44 #include <algorithm>
  45 using namespace llvm;
  46
  47 #define DEBUG_TYPE "dagcombine"
  48
  49 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  50 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  51 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  52 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  53 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  54 STATISTIC(SlicedLoads, "Number of load sliced");
  55
  56 namespace {
  57   static cl::opt<bool>
  58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
  60
  61   static cl::opt<bool>
  62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  63                cl::desc("Enable DAG combiner's use of TBAA"));
  64
  65 #ifndef NDEBUG
  66   static cl::opt<std::string>
  67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  68                cl::desc("Only use DAG-combiner alias analysis in this"
  69                         " function"));
  70 #endif
  71
  72   /// Hidden option to stress test load slicing, i.e., when this option
  73   /// is enabled, load slicing bypasses most of its profitability guards.
  74   static cl::opt<bool>
  75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
  76                     cl::desc("Bypass the profitability model of load "
  77                              "slicing"),
  78                     cl::init(false));
  79
  80   static cl::opt<bool>
  81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
  82                       cl::desc("DAG combiner may split indexing from loads"));
  83
  84 //------------------------------ DAGCombiner ---------------------------------//
  85
  86   class DAGCombiner {
  87     SelectionDAG &DAG;
  88     const TargetLowering &TLI;
  89     CombineLevel Level;
  90     CodeGenOpt::Level OptLevel;
  91     bool LegalOperations;
  92     bool LegalTypes;
  93     bool ForCodeSize;
  94
  95     /// \brief Worklist of all of the nodes that need to be simplified.
  96     ///
  97     /// This must behave as a stack -- new nodes to process are pushed onto the
  98     /// back and when processing we pop off of the back.
  99     ///
 100     /// The worklist will not contain duplicates but may contain null entries
 101     /// due to nodes being deleted from the underlying DAG.
 102     SmallVector<SDNode *, 64> Worklist;
 103
 104     /// \brief Mapping from an SDNode to its position on the worklist.
 105     ///
 106     /// This is used to find and remove nodes from the worklist (by nulling
 107     /// them) when they are deleted from the underlying DAG. It relies on
 108     /// stable indices of nodes within the worklist.
 109     DenseMap<SDNode *, unsigned> WorklistMap;
 110
 111     /// \brief Set of nodes which have been combined (at least once).
 112     ///
 113     /// This is used to allow us to reliably add any operands of a DAG node
 114     /// which have not yet been combined to the worklist.
 115     SmallPtrSet<SDNode *, 32> CombinedNodes;
 116
 117     // AA - Used for DAG load/store alias analysis.
 118     AliasAnalysis *AA;
 119
 120     /// When an instruction is simplified, add all users of the instruction to
 121     /// the work lists because they might get more simplified now.
 122     void AddUsersToWorklist(SDNode *N) {
 123       for (SDNode *Node : N->uses())
 124         AddToWorklist(Node);
 125     }
 126
 127     /// Call the node-specific routine that folds each particular type of node.
 128     SDValue visit(SDNode *N);
 129
 130   public:
 131     /// Add to the worklist making sure its instance is at the back (next to be
 132     /// processed.)
 133     void AddToWorklist(SDNode *N) {
 134       assert(N->getOpcode() != ISD::DELETED_NODE &&
 135              "Deleted Node added to Worklist");
 136
 137       // Skip handle nodes as they can't usefully be combined and confuse the
 138       // zero-use deletion strategy.
 139       if (N->getOpcode() == ISD::HANDLENODE)
 140         return;
 141
 142       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 143         Worklist.push_back(N);
 144     }
 145
 146     /// Remove all instances of N from the worklist.
 147     void removeFromWorklist(SDNode *N) {
 148       CombinedNodes.erase(N);
 149
 150       auto It = WorklistMap.find(N);
 151       if (It == WorklistMap.end())
 152         return; // Not in the worklist.
 153
 154       // Null out the entry rather than erasing it to avoid a linear operation.
 155       Worklist[It->second] = nullptr;
 156       WorklistMap.erase(It);
 157     }
 158
 159     void deleteAndRecombine(SDNode *N);
 160     bool recursivelyDeleteUnusedNodes(SDNode *N);
 161
 162     /// Replaces all uses of the results of one DAG node with new values.
 163     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 164                       bool AddTo = true);
 165
 166     /// Replaces all uses of the results of one DAG node with new values.
 167     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 168       return CombineTo(N, &Res, 1, AddTo);
 169     }
 170
 171     /// Replaces all uses of the results of one DAG node with new values.
 172     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 173                       bool AddTo = true) {
 174       SDValue To[] = { Res0, Res1 };
 175       return CombineTo(N, To, 2, AddTo);
 176     }
 177
 178     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 179
 180   private:
 181     unsigned MaximumLegalStoreInBits;
 182
 183     /// Check the specified integer node value to see if it can be simplified or
 184     /// if things it uses can be simplified by bit propagation.
 185     /// If so, return true.
 186     bool SimplifyDemandedBits(SDValue Op) {
 187       unsigned BitWidth = Op.getScalarValueSizeInBits();
 188       APInt Demanded = APInt::getAllOnesValue(BitWidth);
 189       return SimplifyDemandedBits(Op, Demanded);
 190     }
 191
 192     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
 193
 194     bool CombineToPreIndexedLoadStore(SDNode *N);
 195     bool CombineToPostIndexedLoadStore(SDNode *N);
 196     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 197     bool SliceUpLoad(SDNode *N);
 198
 199     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 200     ///   load.
 201     ///
 202     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 203     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 204     /// \param EltNo index of the vector element to load.
 205     /// \param OriginalLoad load that EVE came from to be replaced.
 206     /// \returns EVE on success SDValue() on failure.
 207     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
 208         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
 209     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 210     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 211     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 212     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 213     SDValue PromoteIntBinOp(SDValue Op);
 214     SDValue PromoteIntShiftOp(SDValue Op);
 215     SDValue PromoteExtend(SDValue Op);
 216     bool PromoteLoad(SDValue Op);
 217
 218     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
 219                          SDValue ExtLoad, const SDLoc &DL,
 220                          ISD::NodeType ExtType);
 221
 222     /// Call the node-specific routine that knows how to fold each
 223     /// particular type of node. If that doesn't do anything, try the
 224     /// target-specific DAG combines.
 225     SDValue combine(SDNode *N);
 226
 227     // Visitation implementation - Implement dag node combining for different
 228     // node types.  The semantics are as follows:
 229     // Return Value:
 230     //   SDValue.getNode() == 0 - No change was made
 231     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 232     //   otherwise              - N should be replaced by the returned Operand.
 233     //
 234     SDValue visitTokenFactor(SDNode *N);
 235     SDValue visitMERGE_VALUES(SDNode *N);
 236     SDValue visitADD(SDNode *N);
 237     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 238     SDValue visitSUB(SDNode *N);
 239     SDValue visitADDC(SDNode *N);
 240     SDValue visitUADDO(SDNode *N);
 241     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 242     SDValue visitSUBC(SDNode *N);
 243     SDValue visitUSUBO(SDNode *N);
 244     SDValue visitADDE(SDNode *N);
 245     SDValue visitADDCARRY(SDNode *N);
 246     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 247     SDValue visitSUBE(SDNode *N);
 248     SDValue visitSUBCARRY(SDNode *N);
 249     SDValue visitMUL(SDNode *N);
 250     SDValue useDivRem(SDNode *N);
 251     SDValue visitSDIV(SDNode *N);
 252     SDValue visitUDIV(SDNode *N);
 253     SDValue visitREM(SDNode *N);
 254     SDValue visitMULHU(SDNode *N);
 255     SDValue visitMULHS(SDNode *N);
 256     SDValue visitSMUL_LOHI(SDNode *N);
 257     SDValue visitUMUL_LOHI(SDNode *N);
 258     SDValue visitSMULO(SDNode *N);
 259     SDValue visitUMULO(SDNode *N);
 260     SDValue visitIMINMAX(SDNode *N);
 261     SDValue visitAND(SDNode *N);
 262     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 263     SDValue visitOR(SDNode *N);
 264     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
 265     SDValue visitXOR(SDNode *N);
 266     SDValue SimplifyVBinOp(SDNode *N);
 267     SDValue visitSHL(SDNode *N);
 268     SDValue visitSRA(SDNode *N);
 269     SDValue visitSRL(SDNode *N);
 270     SDValue visitRotate(SDNode *N);
 271     SDValue visitABS(SDNode *N);
 272     SDValue visitBSWAP(SDNode *N);
 273     SDValue visitBITREVERSE(SDNode *N);
 274     SDValue visitCTLZ(SDNode *N);
 275     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 276     SDValue visitCTTZ(SDNode *N);
 277     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 278     SDValue visitCTPOP(SDNode *N);
 279     SDValue visitSELECT(SDNode *N);
 280     SDValue visitVSELECT(SDNode *N);
 281     SDValue visitSELECT_CC(SDNode *N);
 282     SDValue visitSETCC(SDNode *N);
 283     SDValue visitSETCCE(SDNode *N);
 284     SDValue visitSETCCCARRY(SDNode *N);
 285     SDValue visitSIGN_EXTEND(SDNode *N);
 286     SDValue visitZERO_EXTEND(SDNode *N);
 287     SDValue visitANY_EXTEND(SDNode *N);
 288     SDValue visitAssertZext(SDNode *N);
 289     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 290     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 291     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 292     SDValue visitTRUNCATE(SDNode *N);
 293     SDValue visitBITCAST(SDNode *N);
 294     SDValue visitBUILD_PAIR(SDNode *N);
 295     SDValue visitFADD(SDNode *N);
 296     SDValue visitFSUB(SDNode *N);
 297     SDValue visitFMUL(SDNode *N);
 298     SDValue visitFMA(SDNode *N);
 299     SDValue visitFDIV(SDNode *N);
 300     SDValue visitFREM(SDNode *N);
 301     SDValue visitFSQRT(SDNode *N);
 302     SDValue visitFCOPYSIGN(SDNode *N);
 303     SDValue visitSINT_TO_FP(SDNode *N);
 304     SDValue visitUINT_TO_FP(SDNode *N);
 305     SDValue visitFP_TO_SINT(SDNode *N);
 306     SDValue visitFP_TO_UINT(SDNode *N);
 307     SDValue visitFP_ROUND(SDNode *N);
 308     SDValue visitFP_ROUND_INREG(SDNode *N);
 309     SDValue visitFP_EXTEND(SDNode *N);
 310     SDValue visitFNEG(SDNode *N);
 311     SDValue visitFABS(SDNode *N);
 312     SDValue visitFCEIL(SDNode *N);
 313     SDValue visitFTRUNC(SDNode *N);
 314     SDValue visitFFLOOR(SDNode *N);
 315     SDValue visitFMINNUM(SDNode *N);
 316     SDValue visitFMAXNUM(SDNode *N);
 317     SDValue visitBRCOND(SDNode *N);
 318     SDValue visitBR_CC(SDNode *N);
 319     SDValue visitLOAD(SDNode *N);
 320
 321     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 322     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 323
 324     SDValue visitSTORE(SDNode *N);
 325     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 326     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 327     SDValue visitBUILD_VECTOR(SDNode *N);
 328     SDValue visitCONCAT_VECTORS(SDNode *N);
 329     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 330     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 331     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 332     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 333     SDValue visitMLOAD(SDNode *N);
 334     SDValue visitMSTORE(SDNode *N);
 335     SDValue visitMGATHER(SDNode *N);
 336     SDValue visitMSCATTER(SDNode *N);
 337     SDValue visitFP_TO_FP16(SDNode *N);
 338     SDValue visitFP16_TO_FP(SDNode *N);
 339
 340     SDValue visitFADDForFMACombine(SDNode *N);
 341     SDValue visitFSUBForFMACombine(SDNode *N);
 342     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 343
 344     SDValue XformToShuffleWithZero(SDNode *N);
 345     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
 346                            SDValue RHS);
 347
 348     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 349
 350     SDValue foldSelectOfConstants(SDNode *N);
 351     SDValue foldBinOpIntoSelect(SDNode *BO);
 352     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 353     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
 354     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 355     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 356                              SDValue N2, SDValue N3, ISD::CondCode CC,
 357                              bool NotExtCompare = false);
 358     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 359                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 360     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 361                               const SDLoc &DL);
 362     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 363                           const SDLoc &DL, bool foldBooleans = true);
 364
 365     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 366                            SDValue &CC) const;
 367     bool isOneUseSetCC(SDValue N) const;
 368
 369     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 370                                          unsigned HiOp);
 371     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 372     SDValue CombineExtLoad(SDNode *N);
 373     SDValue combineRepeatedFPDivisors(SDNode *N);
 374     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 375     SDValue BuildSDIV(SDNode *N);
 376     SDValue BuildSDIVPow2(SDNode *N);
 377     SDValue BuildUDIV(SDNode *N);
 378     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
 379     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 380     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 381     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 382     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 383     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
 384                                 SDNodeFlags Flags, bool Reciprocal);
 385     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
 386                                 SDNodeFlags Flags, bool Reciprocal);
 387     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 388                                bool DemandHighBits = true);
 389     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 390     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 391                               SDValue InnerPos, SDValue InnerNeg,
 392                               unsigned PosOpcode, unsigned NegOpcode,
 393                               const SDLoc &DL);
 394     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 395     SDValue MatchLoadCombine(SDNode *N);
 396     SDValue ReduceLoadWidth(SDNode *N);
 397     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 398     SDValue splitMergedValStore(StoreSDNode *ST);
 399     SDValue TransformFPLoadStorePair(SDNode *N);
 400     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 401     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
 402     SDValue reduceBuildVecToShuffle(SDNode *N);
 403     SDValue reduceBuildVecToTrunc(SDNode *N);
 404     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 405                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 406                                   SDValue VecIn2, unsigned LeftIdx);
 407     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
 408
 409     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 410
 411     /// Walk up chain skipping non-aliasing memory nodes,
 412     /// looking for aliasing nodes and adding them to the Aliases vector.
 413     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 414                           SmallVectorImpl<SDValue> &Aliases);
 415
 416     /// Return true if there is any possibility that the two addresses overlap.
 417     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
 418
 419     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 420     /// chain (aliasing node.)
 421     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 422
 423     /// Try to replace a store and any possibly adjacent stores on
 424     /// consecutive chains with better chains. Return true only if St is
 425     /// replaced.
 426     ///
 427     /// Notice that other chains may still be replaced even if the function
 428     /// returns false.
 429     bool findBetterNeighborChains(StoreSDNode *St);
 430
 431     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
 432     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
 433
 434     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 435     /// is located in a sequence of memory operations connected by a chain.
 436     struct MemOpLink {
 437       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 438           : MemNode(N), OffsetFromBase(Offset) {}
 439       // Ptr to the mem node.
 440       LSBaseSDNode *MemNode;
 441       // Offset from the base ptr.
 442       int64_t OffsetFromBase;
 443     };
 444
 445     /// This is a helper function for visitMUL to check the profitability
 446     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 447     /// MulNode is the original multiply, AddNode is (add x, c1),
 448     /// and ConstNode is c2.
 449     bool isMulAddWithConstProfitable(SDNode *MulNode,
 450                                      SDValue &AddNode,
 451                                      SDValue &ConstNode);
 452
 453
 454     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 455     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 456     /// the type of the loaded value to be extended.  LoadedVT returns the type
 457     /// of the original loaded value.  NarrowLoad returns whether the load would
 458     /// need to be narrowed in order to match.
 459     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 460                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
 461                           bool &NarrowLoad);
 462
 463     /// Helper function for MergeConsecutiveStores which merges the
 464     /// component store chains.
 465     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 466                                 unsigned NumStores);
 467
 468     /// This is a helper function for MergeConsecutiveStores. When the source
 469     /// elements of the consecutive stores are all constants or all extracted
 470     /// vector elements, try to merge them into one larger store.
 471     /// \return True if a merged store was created.
 472     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 473                                          EVT MemVT, unsigned NumStores,
 474                                          bool IsConstantSrc, bool UseVector,
 475                                          bool UseTrunc);
 476
 477     /// This is a helper function for MergeConsecutiveStores.
 478     /// Stores that may be merged are placed in StoreNodes.
 479     void getStoreMergeCandidates(StoreSDNode *St,
 480                                  SmallVectorImpl<MemOpLink> &StoreNodes);
 481
 482     /// Helper function for MergeConsecutiveStores. Checks if
 483     /// Candidate stores have indirect dependency through their
 484     /// operands. \return True if safe to merge
 485     bool checkMergeStoreCandidatesForDependencies(
 486         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
 487
 488     /// Merge consecutive store operations into a wide store.
 489     /// This optimization uses wide integers or vectors when possible.
 490     /// \return number of stores that were merged into a merged store (the
 491     /// affected nodes are stored as a prefix in \p StoreNodes).
 492     bool MergeConsecutiveStores(StoreSDNode *N);
 493
 494     /// \brief Try to transform a truncation where C is a constant:
 495     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 496     ///
 497     /// \p N needs to be a truncation and its first operand an AND. Other
 498     /// requirements are checked by the function (e.g. that trunc is
 499     /// single-use) and if missed an empty SDValue is returned.
 500     SDValue distributeTruncateThroughAnd(SDNode *N);
 501
 502   public:
 503     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 504         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 505           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
 506       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
 507
 508       MaximumLegalStoreInBits = 0;
 509       for (MVT VT : MVT::all_valuetypes())
 510         if (EVT(VT).isSimple() && VT != MVT::Other &&
 511             TLI.isTypeLegal(EVT(VT)) &&
 512             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 513           MaximumLegalStoreInBits = VT.getSizeInBits();
 514     }
 515
 516     /// Runs the dag combiner on all nodes in the work list
 517     void Run(CombineLevel AtLevel);
 518
 519     SelectionDAG &getDAG() const { return DAG; }
 520
 521     /// Returns a type large enough to hold any valid shift amount - before type
 522     /// legalization these can be huge.
 523     EVT getShiftAmountTy(EVT LHSTy) {
 524       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 525       if (LHSTy.isVector())
 526         return LHSTy;
 527       auto &DL = DAG.getDataLayout();
 528       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
 529                         : TLI.getPointerTy(DL);
 530     }
 531
 532     /// This method returns true if we are running before type legalization or
 533     /// if the specified VT is legal.
 534     bool isTypeLegal(const EVT &VT) {
 535       if (!LegalTypes) return true;
 536       return TLI.isTypeLegal(VT);
 537     }
 538
 539     /// Convenience wrapper around TargetLowering::getSetCCResultType
 540     EVT getSetCCResultType(EVT VT) const {
 541       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 542     }
 543   };
 544 }
 545
 546
 547 namespace {
 548 /// This class is a DAGUpdateListener that removes any deleted
 549 /// nodes from the worklist.
 550 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 551   DAGCombiner &DC;
 552 public:
 553   explicit WorklistRemover(DAGCombiner &dc)
 554     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 555
 556   void NodeDeleted(SDNode *N, SDNode *E) override {
 557     DC.removeFromWorklist(N);
 558   }
 559 };
 560 }
 561
 562 //===----------------------------------------------------------------------===//
 563 //  TargetLowering::DAGCombinerInfo implementation
 564 //===----------------------------------------------------------------------===//
 565
 566 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 567   ((DAGCombiner*)DC)->AddToWorklist(N);
 568 }
 569
 570 SDValue TargetLowering::DAGCombinerInfo::
 571 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 572   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 573 }
 574
 575 SDValue TargetLowering::DAGCombinerInfo::
 576 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 577   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 578 }
 579
 580
 581 SDValue TargetLowering::DAGCombinerInfo::
 582 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 583   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 584 }
 585
 586 void TargetLowering::DAGCombinerInfo::
 587 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 588   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 589 }
 590
 591 //===----------------------------------------------------------------------===//
 592 // Helper Functions
 593 //===----------------------------------------------------------------------===//
 594
 595 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 596   removeFromWorklist(N);
 597
 598   // If the operands of this node are only used by the node, they will now be
 599   // dead. Make sure to re-visit them and recursively delete dead nodes.
 600   for (const SDValue &Op : N->ops())
 601     // For an operand generating multiple values, one of the values may
 602     // become dead allowing further simplification (e.g. split index
 603     // arithmetic from an indexed load).
 604     if (Op->hasOneUse() || Op->getNumValues() > 1)
 605       AddToWorklist(Op.getNode());
 606
 607   DAG.DeleteNode(N);
 608 }
 609
 610 /// Return 1 if we can compute the negated form of the specified expression for
 611 /// the same cost as the expression itself, or 2 if we can compute the negated
 612 /// form more cheaply than the expression itself.
 613 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 614                                const TargetLowering &TLI,
 615                                const TargetOptions *Options,
 616                                unsigned Depth = 0) {
 617   // fneg is removable even if it has multiple uses.
 618   if (Op.getOpcode() == ISD::FNEG) return 2;
 619
 620   // Don't allow anything with multiple uses.
 621   if (!Op.hasOneUse()) return 0;
 622
 623   // Don't recurse exponentially.
 624   if (Depth > 6) return 0;
 625
 626   switch (Op.getOpcode()) {
 627   default: return false;
 628   case ISD::ConstantFP: {
 629     if (!LegalOperations)
 630       return 1;
 631
 632     // Don't invert constant FP values after legalization unless the target says
 633     // the negated constant is legal.
 634     EVT VT = Op.getValueType();
 635     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 636       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
 637   }
 638   case ISD::FADD:
 639     // FIXME: determine better conditions for this xform.
 640     if (!Options->UnsafeFPMath) return 0;
 641
 642     // After operation legalization, it might not be legal to create new FSUBs.
 643     if (LegalOperations &&
 644         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
 645       return 0;
 646
 647     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 648     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 649                                     Options, Depth + 1))
 650       return V;
 651     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 652     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 653                               Depth + 1);
 654   case ISD::FSUB:
 655     // We can't turn -(A-B) into B-A when we honor signed zeros.
 656     if (!Options->NoSignedZerosFPMath &&
 657         !Op.getNode()->getFlags().hasNoSignedZeros())
 658       return 0;
 659
 660     // fold (fneg (fsub A, B)) -> (fsub B, A)
 661     return 1;
 662
 663   case ISD::FMUL:
 664   case ISD::FDIV:
 665     if (Options->HonorSignDependentRoundingFPMath()) return 0;
 666
 667     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 668     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 669                                     Options, Depth + 1))
 670       return V;
 671
 672     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 673                               Depth + 1);
 674
 675   case ISD::FP_EXTEND:
 676   case ISD::FP_ROUND:
 677   case ISD::FSIN:
 678     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 679                               Depth + 1);
 680   }
 681 }
 682
 683 /// If isNegatibleForFree returns true, return the newly negated expression.
 684 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 685                                     bool LegalOperations, unsigned Depth = 0) {
 686   const TargetOptions &Options = DAG.getTarget().Options;
 687   // fneg is removable even if it has multiple uses.
 688   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 689
 690   // Don't allow anything with multiple uses.
 691   assert(Op.hasOneUse() && "Unknown reuse!");
 692
 693   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 694
 695   const SDNodeFlags Flags = Op.getNode()->getFlags();
 696
 697   switch (Op.getOpcode()) {
 698   default: llvm_unreachable("Unknown code");
 699   case ISD::ConstantFP: {
 700     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 701     V.changeSign();
 702     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 703   }
 704   case ISD::FADD:
 705     // FIXME: determine better conditions for this xform.
 706     assert(Options.UnsafeFPMath);
 707
 708     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 709     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 710                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 711       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 712                          GetNegatedExpression(Op.getOperand(0), DAG,
 713                                               LegalOperations, Depth+1),
 714                          Op.getOperand(1), Flags);
 715     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 716     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 717                        GetNegatedExpression(Op.getOperand(1), DAG,
 718                                             LegalOperations, Depth+1),
 719                        Op.getOperand(0), Flags);
 720   case ISD::FSUB:
 721     // fold (fneg (fsub 0, B)) -> B
 722     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 723       if (N0CFP->isZero())
 724         return Op.getOperand(1);
 725
 726     // fold (fneg (fsub A, B)) -> (fsub B, A)
 727     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 728                        Op.getOperand(1), Op.getOperand(0), Flags);
 729
 730   case ISD::FMUL:
 731   case ISD::FDIV:
 732     assert(!Options.HonorSignDependentRoundingFPMath());
 733
 734     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 735     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 736                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 737       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 738                          GetNegatedExpression(Op.getOperand(0), DAG,
 739                                               LegalOperations, Depth+1),
 740                          Op.getOperand(1), Flags);
 741
 742     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 743     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 744                        Op.getOperand(0),
 745                        GetNegatedExpression(Op.getOperand(1), DAG,
 746                                             LegalOperations, Depth+1), Flags);
 747
 748   case ISD::FP_EXTEND:
 749   case ISD::FSIN:
 750     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 751                        GetNegatedExpression(Op.getOperand(0), DAG,
 752                                             LegalOperations, Depth+1));
 753   case ISD::FP_ROUND:
 754       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 755                          GetNegatedExpression(Op.getOperand(0), DAG,
 756                                               LegalOperations, Depth+1),
 757                          Op.getOperand(1));
 758   }
 759 }
 760
 761 // APInts must be the same size for most operations, this helper
 762 // function zero extends the shorter of the pair so that they match.
 763 // We provide an Offset so that we can create bitwidths that won't overflow.
 764 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 765   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 766   LHS = LHS.zextOrSelf(Bits);
 767   RHS = RHS.zextOrSelf(Bits);
 768 }
 769
 770 // Return true if this node is a setcc, or is a select_cc
 771 // that selects between the target values used for true and false, making it
 772 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 773 // the appropriate nodes based on the type of node we are checking. This
 774 // simplifies life a bit for the callers.
 775 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 776                                     SDValue &CC) const {
 777   if (N.getOpcode() == ISD::SETCC) {
 778     LHS = N.getOperand(0);
 779     RHS = N.getOperand(1);
 780     CC  = N.getOperand(2);
 781     return true;
 782   }
 783
 784   if (N.getOpcode() != ISD::SELECT_CC ||
 785       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 786       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 787     return false;
 788
 789   if (TLI.getBooleanContents(N.getValueType()) ==
 790       TargetLowering::UndefinedBooleanContent)
 791     return false;
 792
 793   LHS = N.getOperand(0);
 794   RHS = N.getOperand(1);
 795   CC  = N.getOperand(4);
 796   return true;
 797 }
 798
 799 /// Return true if this is a SetCC-equivalent operation with only one use.
 800 /// If this is true, it allows the users to invert the operation for free when
 801 /// it is profitable to do so.
 802 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 803   SDValue N0, N1, N2;
 804   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 805     return true;
 806   return false;
 807 }
 808
 809 // \brief Returns the SDNode if it is a constant float BuildVector
 810 // or constant float.
 811 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 812   if (isa<ConstantFPSDNode>(N))
 813     return N.getNode();
 814   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 815     return N.getNode();
 816   return nullptr;
 817 }
 818
 819 // Determines if it is a constant integer or a build vector of constant
 820 // integers (and undefs).
 821 // Do not permit build vector implicit truncation.
 822 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 823   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 824     return !(Const->isOpaque() && NoOpaques);
 825   if (N.getOpcode() != ISD::BUILD_VECTOR)
 826     return false;
 827   unsigned BitWidth = N.getScalarValueSizeInBits();
 828   for (const SDValue &Op : N->op_values()) {
 829     if (Op.isUndef())
 830       continue;
 831     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 832     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 833         (Const->isOpaque() && NoOpaques))
 834       return false;
 835   }
 836   return true;
 837 }
 838
 839 // Determines if it is a constant null integer or a splatted vector of a
 840 // constant null integer (with no undefs).
 841 // Build vector implicit truncation is not an issue for null values.
 842 static bool isNullConstantOrNullSplatConstant(SDValue N) {
 843   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 844     return Splat->isNullValue();
 845   return false;
 846 }
 847
 848 // Determines if it is a constant integer of one or a splatted vector of a
 849 // constant integer of one (with no undefs).
 850 // Do not permit build vector implicit truncation.
 851 static bool isOneConstantOrOneSplatConstant(SDValue N) {
 852   unsigned BitWidth = N.getScalarValueSizeInBits();
 853   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 854     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
 855   return false;
 856 }
 857
 858 // Determines if it is a constant integer of all ones or a splatted vector of a
 859 // constant integer of all ones (with no undefs).
 860 // Do not permit build vector implicit truncation.
 861 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
 862   unsigned BitWidth = N.getScalarValueSizeInBits();
 863   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 864     return Splat->isAllOnesValue() &&
 865            Splat->getAPIntValue().getBitWidth() == BitWidth;
 866   return false;
 867 }
 868
 869 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 870 // undef's.
 871 static bool isAnyConstantBuildVector(const SDNode *N) {
 872   return ISD::isBuildVectorOfConstantSDNodes(N) ||
 873          ISD::isBuildVectorOfConstantFPSDNodes(N);
 874 }
 875
 876 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 877                                     SDValue N1) {
 878   EVT VT = N0.getValueType();
 879   if (N0.getOpcode() == Opc) {
 880     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 881       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 882         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
 883         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
 884           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 885         return SDValue();
 886       }
 887       if (N0.hasOneUse()) {
 888         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
 889         // use
 890         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 891         if (!OpNode.getNode())
 892           return SDValue();
 893         AddToWorklist(OpNode.getNode());
 894         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 895       }
 896     }
 897   }
 898
 899   if (N1.getOpcode() == Opc) {
 900     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
 901       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
 902         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
 903         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
 904           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
 905         return SDValue();
 906       }
 907       if (N1.hasOneUse()) {
 908         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
 909         // use
 910         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
 911         if (!OpNode.getNode())
 912           return SDValue();
 913         AddToWorklist(OpNode.getNode());
 914         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
 915       }
 916     }
 917   }
 918
 919   return SDValue();
 920 }
 921
 922 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 923                                bool AddTo) {
 924   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 925   ++NodesCombined;
 926   DEBUG(dbgs() << "\nReplacing.1 ";
 927         N->dump(&DAG);
 928         dbgs() << "\nWith: ";
 929         To[0].getNode()->dump(&DAG);
 930         dbgs() << " and " << NumTo-1 << " other values\n");
 931   for (unsigned i = 0, e = NumTo; i != e; ++i)
 932     assert((!To[i].getNode() ||
 933             N->getValueType(i) == To[i].getValueType()) &&
 934            "Cannot combine value to value of different type!");
 935
 936   WorklistRemover DeadNodes(*this);
 937   DAG.ReplaceAllUsesWith(N, To);
 938   if (AddTo) {
 939     // Push the new nodes and any users onto the worklist
 940     for (unsigned i = 0, e = NumTo; i != e; ++i) {
 941       if (To[i].getNode()) {
 942         AddToWorklist(To[i].getNode());
 943         AddUsersToWorklist(To[i].getNode());
 944       }
 945     }
 946   }
 947
 948   // Finally, if the node is now dead, remove it from the graph.  The node
 949   // may not be dead if the replacement process recursively simplified to
 950   // something else needing this node.
 951   if (N->use_empty())
 952     deleteAndRecombine(N);
 953   return SDValue(N, 0);
 954 }
 955
 956 void DAGCombiner::
 957 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 958   // Replace all uses.  If any nodes become isomorphic to other nodes and
 959   // are deleted, make sure to remove them from our worklist.
 960   WorklistRemover DeadNodes(*this);
 961   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
 962
 963   // Push the new node and any (possibly new) users onto the worklist.
 964   AddToWorklist(TLO.New.getNode());
 965   AddUsersToWorklist(TLO.New.getNode());
 966
 967   // Finally, if the node is now dead, remove it from the graph.  The node
 968   // may not be dead if the replacement process recursively simplified to
 969   // something else needing this node.
 970   if (TLO.Old.getNode()->use_empty())
 971     deleteAndRecombine(TLO.Old.getNode());
 972 }
 973
 974 /// Check the specified integer node value to see if it can be simplified or if
 975 /// things it uses can be simplified by bit propagation. If so, return true.
 976 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 977   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 978   KnownBits Known;
 979   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
 980     return false;
 981
 982   // Revisit the node.
 983   AddToWorklist(Op.getNode());
 984
 985   // Replace the old value with the new one.
 986   ++NodesCombined;
 987   DEBUG(dbgs() << "\nReplacing.2 ";
 988         TLO.Old.getNode()->dump(&DAG);
 989         dbgs() << "\nWith: ";
 990         TLO.New.getNode()->dump(&DAG);
 991         dbgs() << '\n');
 992
 993   CommitTargetLoweringOpt(TLO);
 994   return true;
 995 }
 996
 997 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
 998   SDLoc DL(Load);
 999   EVT VT = Load->getValueType(0);
1000   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1001
1002   DEBUG(dbgs() << "\nReplacing.9 ";
1003         Load->dump(&DAG);
1004         dbgs() << "\nWith: ";
1005         Trunc.getNode()->dump(&DAG);
1006         dbgs() << '\n');
1007   WorklistRemover DeadNodes(*this);
1008   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1009   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1010   deleteAndRecombine(Load);
1011   AddToWorklist(Trunc.getNode());
1012 }
1013
1014 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1015   Replace = false;
1016   SDLoc DL(Op);
1017   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1018     LoadSDNode *LD = cast<LoadSDNode>(Op);
1019     EVT MemVT = LD->getMemoryVT();
1020     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1021       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1022                                                        : ISD::EXTLOAD)
1023       : LD->getExtensionType();
1024     Replace = true;
1025     return DAG.getExtLoad(ExtType, DL, PVT,
1026                           LD->getChain(), LD->getBasePtr(),
1027                           MemVT, LD->getMemOperand());
1028   }
1029
1030   unsigned Opc = Op.getOpcode();
1031   switch (Opc) {
1032   default: break;
1033   case ISD::AssertSext:
1034     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1035       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1036     break;
1037   case ISD::AssertZext:
1038     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1039       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1040     break;
1041   case ISD::Constant: {
1042     unsigned ExtOpc =
1043       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1044     return DAG.getNode(ExtOpc, DL, PVT, Op);
1045   }
1046   }
1047
1048   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1049     return SDValue();
1050   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1051 }
1052
1053 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1054   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1055     return SDValue();
1056   EVT OldVT = Op.getValueType();
1057   SDLoc DL(Op);
1058   bool Replace = false;
1059   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1060   if (!NewOp.getNode())
1061     return SDValue();
1062   AddToWorklist(NewOp.getNode());
1063
1064   if (Replace)
1065     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1066   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1067                      DAG.getValueType(OldVT));
1068 }
1069
1070 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1071   EVT OldVT = Op.getValueType();
1072   SDLoc DL(Op);
1073   bool Replace = false;
1074   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1075   if (!NewOp.getNode())
1076     return SDValue();
1077   AddToWorklist(NewOp.getNode());
1078
1079   if (Replace)
1080     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1081   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1082 }
1083
1084 /// Promote the specified integer binary operation if the target indicates it is
1085 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1086 /// i32 since i16 instructions are longer.
1087 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1088   if (!LegalOperations)
1089     return SDValue();
1090
1091   EVT VT = Op.getValueType();
1092   if (VT.isVector() || !VT.isInteger())
1093     return SDValue();
1094
1095   // If operation type is 'undesirable', e.g. i16 on x86, consider
1096   // promoting it.
1097   unsigned Opc = Op.getOpcode();
1098   if (TLI.isTypeDesirableForOp(Opc, VT))
1099     return SDValue();
1100
1101   EVT PVT = VT;
1102   // Consult target whether it is a good idea to promote this operation and
1103   // what's the right type to promote it to.
1104   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1105     assert(PVT != VT && "Don't know what type to promote to!");
1106
1107     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1108
1109     bool Replace0 = false;
1110     SDValue N0 = Op.getOperand(0);
1111     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1112
1113     bool Replace1 = false;
1114     SDValue N1 = Op.getOperand(1);
1115     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1116     SDLoc DL(Op);
1117
1118     SDValue RV =
1119         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1120
1121     // New replace instances of N0 and N1
1122     if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
1123         NN0.getOpcode() != ISD::DELETED_NODE) {
1124       AddToWorklist(NN0.getNode());
1125       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1126     }
1127
1128     if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
1129         NN1.getOpcode() != ISD::DELETED_NODE) {
1130       AddToWorklist(NN1.getNode());
1131       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1132     }
1133
1134     // Deal with Op being deleted.
1135     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1136       return RV;
1137   }
1138   return SDValue();
1139 }
1140
1141 /// Promote the specified integer shift operation if the target indicates it is
1142 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1143 /// i32 since i16 instructions are longer.
1144 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1145   if (!LegalOperations)
1146     return SDValue();
1147
1148   EVT VT = Op.getValueType();
1149   if (VT.isVector() || !VT.isInteger())
1150     return SDValue();
1151
1152   // If operation type is 'undesirable', e.g. i16 on x86, consider
1153   // promoting it.
1154   unsigned Opc = Op.getOpcode();
1155   if (TLI.isTypeDesirableForOp(Opc, VT))
1156     return SDValue();
1157
1158   EVT PVT = VT;
1159   // Consult target whether it is a good idea to promote this operation and
1160   // what's the right type to promote it to.
1161   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1162     assert(PVT != VT && "Don't know what type to promote to!");
1163
1164     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1165
1166     bool Replace = false;
1167     SDValue N0 = Op.getOperand(0);
1168     SDValue N1 = Op.getOperand(1);
1169     if (Opc == ISD::SRA)
1170       N0 = SExtPromoteOperand(N0, PVT);
1171     else if (Opc == ISD::SRL)
1172       N0 = ZExtPromoteOperand(N0, PVT);
1173     else
1174       N0 = PromoteOperand(N0, PVT, Replace);
1175
1176     if (!N0.getNode())
1177       return SDValue();
1178
1179     SDLoc DL(Op);
1180     SDValue RV =
1181         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1182
1183     AddToWorklist(N0.getNode());
1184     if (Replace)
1185       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1186
1187     // Deal with Op being deleted.
1188     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1189       return RV;
1190   }
1191   return SDValue();
1192 }
1193
1194 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1195   if (!LegalOperations)
1196     return SDValue();
1197
1198   EVT VT = Op.getValueType();
1199   if (VT.isVector() || !VT.isInteger())
1200     return SDValue();
1201
1202   // If operation type is 'undesirable', e.g. i16 on x86, consider
1203   // promoting it.
1204   unsigned Opc = Op.getOpcode();
1205   if (TLI.isTypeDesirableForOp(Opc, VT))
1206     return SDValue();
1207
1208   EVT PVT = VT;
1209   // Consult target whether it is a good idea to promote this operation and
1210   // what's the right type to promote it to.
1211   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1212     assert(PVT != VT && "Don't know what type to promote to!");
1213     // fold (aext (aext x)) -> (aext x)
1214     // fold (aext (zext x)) -> (zext x)
1215     // fold (aext (sext x)) -> (sext x)
1216     DEBUG(dbgs() << "\nPromoting ";
1217           Op.getNode()->dump(&DAG));
1218     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1219   }
1220   return SDValue();
1221 }
1222
1223 bool DAGCombiner::PromoteLoad(SDValue Op) {
1224   if (!LegalOperations)
1225     return false;
1226
1227   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1228     return false;
1229
1230   EVT VT = Op.getValueType();
1231   if (VT.isVector() || !VT.isInteger())
1232     return false;
1233
1234   // If operation type is 'undesirable', e.g. i16 on x86, consider
1235   // promoting it.
1236   unsigned Opc = Op.getOpcode();
1237   if (TLI.isTypeDesirableForOp(Opc, VT))
1238     return false;
1239
1240   EVT PVT = VT;
1241   // Consult target whether it is a good idea to promote this operation and
1242   // what's the right type to promote it to.
1243   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1244     assert(PVT != VT && "Don't know what type to promote to!");
1245
1246     SDLoc DL(Op);
1247     SDNode *N = Op.getNode();
1248     LoadSDNode *LD = cast<LoadSDNode>(N);
1249     EVT MemVT = LD->getMemoryVT();
1250     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1251       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1252                                                        : ISD::EXTLOAD)
1253       : LD->getExtensionType();
1254     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1255                                    LD->getChain(), LD->getBasePtr(),
1256                                    MemVT, LD->getMemOperand());
1257     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1258
1259     DEBUG(dbgs() << "\nPromoting ";
1260           N->dump(&DAG);
1261           dbgs() << "\nTo: ";
1262           Result.getNode()->dump(&DAG);
1263           dbgs() << '\n');
1264     WorklistRemover DeadNodes(*this);
1265     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1266     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1267     deleteAndRecombine(N);
1268     AddToWorklist(Result.getNode());
1269     return true;
1270   }
1271   return false;
1272 }
1273
1274 /// \brief Recursively delete a node which has no uses and any operands for
1275 /// which it is the only use.
1276 ///
1277 /// Note that this both deletes the nodes and removes them from the worklist.
1278 /// It also adds any nodes who have had a user deleted to the worklist as they
1279 /// may now have only one use and subject to other combines.
1280 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1281   if (!N->use_empty())
1282     return false;
1283
1284   SmallSetVector<SDNode *, 16> Nodes;
1285   Nodes.insert(N);
1286   do {
1287     N = Nodes.pop_back_val();
1288     if (!N)
1289       continue;
1290
1291     if (N->use_empty()) {
1292       for (const SDValue &ChildN : N->op_values())
1293         Nodes.insert(ChildN.getNode());
1294
1295       removeFromWorklist(N);
1296       DAG.DeleteNode(N);
1297     } else {
1298       AddToWorklist(N);
1299     }
1300   } while (!Nodes.empty());
1301   return true;
1302 }
1303
1304 //===----------------------------------------------------------------------===//
1305 //  Main DAG Combiner implementation
1306 //===----------------------------------------------------------------------===//
1307
1308 void DAGCombiner::Run(CombineLevel AtLevel) {
1309   // set the instance variables, so that the various visit routines may use it.
1310   Level = AtLevel;
1311   LegalOperations = Level >= AfterLegalizeVectorOps;
1312   LegalTypes = Level >= AfterLegalizeTypes;
1313
1314   // Add all the dag nodes to the worklist.
1315   for (SDNode &Node : DAG.allnodes())
1316     AddToWorklist(&Node);
1317
1318   // Create a dummy node (which is not added to allnodes), that adds a reference
1319   // to the root node, preventing it from being deleted, and tracking any
1320   // changes of the root.
1321   HandleSDNode Dummy(DAG.getRoot());
1322
1323   // While the worklist isn't empty, find a node and try to combine it.
1324   while (!WorklistMap.empty()) {
1325     SDNode *N;
1326     // The Worklist holds the SDNodes in order, but it may contain null entries.
1327     do {
1328       N = Worklist.pop_back_val();
1329     } while (!N);
1330
1331     bool GoodWorklistEntry = WorklistMap.erase(N);
1332     (void)GoodWorklistEntry;
1333     assert(GoodWorklistEntry &&
1334            "Found a worklist entry without a corresponding map entry!");
1335
1336     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1337     // N is deleted from the DAG, since they too may now be dead or may have a
1338     // reduced number of uses, allowing other xforms.
1339     if (recursivelyDeleteUnusedNodes(N))
1340       continue;
1341
1342     WorklistRemover DeadNodes(*this);
1343
1344     // If this combine is running after legalizing the DAG, re-legalize any
1345     // nodes pulled off the worklist.
1346     if (Level == AfterLegalizeDAG) {
1347       SmallSetVector<SDNode *, 16> UpdatedNodes;
1348       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1349
1350       for (SDNode *LN : UpdatedNodes) {
1351         AddToWorklist(LN);
1352         AddUsersToWorklist(LN);
1353       }
1354       if (!NIsValid)
1355         continue;
1356     }
1357
1358     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1359
1360     // Add any operands of the new node which have not yet been combined to the
1361     // worklist as well. Because the worklist uniques things already, this
1362     // won't repeatedly process the same operand.
1363     CombinedNodes.insert(N);
1364     for (const SDValue &ChildN : N->op_values())
1365       if (!CombinedNodes.count(ChildN.getNode()))
1366         AddToWorklist(ChildN.getNode());
1367
1368     SDValue RV = combine(N);
1369
1370     if (!RV.getNode())
1371       continue;
1372
1373     ++NodesCombined;
1374
1375     // If we get back the same node we passed in, rather than a new node or
1376     // zero, we know that the node must have defined multiple values and
1377     // CombineTo was used.  Since CombineTo takes care of the worklist
1378     // mechanics for us, we have no work to do in this case.
1379     if (RV.getNode() == N)
1380       continue;
1381
1382     assert(N->getOpcode() != ISD::DELETED_NODE &&
1383            RV.getOpcode() != ISD::DELETED_NODE &&
1384            "Node was deleted but visit returned new node!");
1385
1386     DEBUG(dbgs() << " ... into: ";
1387           RV.getNode()->dump(&DAG));
1388
1389     if (N->getNumValues() == RV.getNode()->getNumValues())
1390       DAG.ReplaceAllUsesWith(N, RV.getNode());
1391     else {
1392       assert(N->getValueType(0) == RV.getValueType() &&
1393              N->getNumValues() == 1 && "Type mismatch");
1394       DAG.ReplaceAllUsesWith(N, &RV);
1395     }
1396
1397     // Push the new node and any users onto the worklist
1398     AddToWorklist(RV.getNode());
1399     AddUsersToWorklist(RV.getNode());
1400
1401     // Finally, if the node is now dead, remove it from the graph.  The node
1402     // may not be dead if the replacement process recursively simplified to
1403     // something else needing this node. This will also take care of adding any
1404     // operands which have lost a user to the worklist.
1405     recursivelyDeleteUnusedNodes(N);
1406   }
1407
1408   // If the root changed (e.g. it was a dead load, update the root).
1409   DAG.setRoot(Dummy.getValue());
1410   DAG.RemoveDeadNodes();
1411 }
1412
1413 SDValue DAGCombiner::visit(SDNode *N) {
1414   switch (N->getOpcode()) {
1415   default: break;
1416   case ISD::TokenFactor:        return visitTokenFactor(N);
1417   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1418   case ISD::ADD:                return visitADD(N);
1419   case ISD::SUB:                return visitSUB(N);
1420   case ISD::ADDC:               return visitADDC(N);
1421   case ISD::UADDO:              return visitUADDO(N);
1422   case ISD::SUBC:               return visitSUBC(N);
1423   case ISD::USUBO:              return visitUSUBO(N);
1424   case ISD::ADDE:               return visitADDE(N);
1425   case ISD::ADDCARRY:           return visitADDCARRY(N);
1426   case ISD::SUBE:               return visitSUBE(N);
1427   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1428   case ISD::MUL:                return visitMUL(N);
1429   case ISD::SDIV:               return visitSDIV(N);
1430   case ISD::UDIV:               return visitUDIV(N);
1431   case ISD::SREM:
1432   case ISD::UREM:               return visitREM(N);
1433   case ISD::MULHU:              return visitMULHU(N);
1434   case ISD::MULHS:              return visitMULHS(N);
1435   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1436   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1437   case ISD::SMULO:              return visitSMULO(N);
1438   case ISD::UMULO:              return visitUMULO(N);
1439   case ISD::SMIN:
1440   case ISD::SMAX:
1441   case ISD::UMIN:
1442   case ISD::UMAX:               return visitIMINMAX(N);
1443   case ISD::AND:                return visitAND(N);
1444   case ISD::OR:                 return visitOR(N);
1445   case ISD::XOR:                return visitXOR(N);
1446   case ISD::SHL:                return visitSHL(N);
1447   case ISD::SRA:                return visitSRA(N);
1448   case ISD::SRL:                return visitSRL(N);
1449   case ISD::ROTR:
1450   case ISD::ROTL:               return visitRotate(N);
1451   case ISD::ABS:                return visitABS(N);
1452   case ISD::BSWAP:              return visitBSWAP(N);
1453   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1454   case ISD::CTLZ:               return visitCTLZ(N);
1455   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1456   case ISD::CTTZ:               return visitCTTZ(N);
1457   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1458   case ISD::CTPOP:              return visitCTPOP(N);
1459   case ISD::SELECT:             return visitSELECT(N);
1460   case ISD::VSELECT:            return visitVSELECT(N);
1461   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1462   case ISD::SETCC:              return visitSETCC(N);
1463   case ISD::SETCCE:             return visitSETCCE(N);
1464   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1465   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1466   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1467   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1468   case ISD::AssertZext:         return visitAssertZext(N);
1469   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1470   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1471   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1472   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1473   case ISD::BITCAST:            return visitBITCAST(N);
1474   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1475   case ISD::FADD:               return visitFADD(N);
1476   case ISD::FSUB:               return visitFSUB(N);
1477   case ISD::FMUL:               return visitFMUL(N);
1478   case ISD::FMA:                return visitFMA(N);
1479   case ISD::FDIV:               return visitFDIV(N);
1480   case ISD::FREM:               return visitFREM(N);
1481   case ISD::FSQRT:              return visitFSQRT(N);
1482   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1483   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1484   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1485   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1486   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1487   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1488   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1489   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1490   case ISD::FNEG:               return visitFNEG(N);
1491   case ISD::FABS:               return visitFABS(N);
1492   case ISD::FFLOOR:             return visitFFLOOR(N);
1493   case ISD::FMINNUM:            return visitFMINNUM(N);
1494   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1495   case ISD::FCEIL:              return visitFCEIL(N);
1496   case ISD::FTRUNC:             return visitFTRUNC(N);
1497   case ISD::BRCOND:             return visitBRCOND(N);
1498   case ISD::BR_CC:              return visitBR_CC(N);
1499   case ISD::LOAD:               return visitLOAD(N);
1500   case ISD::STORE:              return visitSTORE(N);
1501   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1502   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1503   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1504   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1505   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1506   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1507   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1508   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1509   case ISD::MGATHER:            return visitMGATHER(N);
1510   case ISD::MLOAD:              return visitMLOAD(N);
1511   case ISD::MSCATTER:           return visitMSCATTER(N);
1512   case ISD::MSTORE:             return visitMSTORE(N);
1513   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1514   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1515   }
1516   return SDValue();
1517 }
1518
1519 SDValue DAGCombiner::combine(SDNode *N) {
1520   SDValue RV = visit(N);
1521
1522   // If nothing happened, try a target-specific DAG combine.
1523   if (!RV.getNode()) {
1524     assert(N->getOpcode() != ISD::DELETED_NODE &&
1525            "Node was deleted but visit returned NULL!");
1526
1527     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1528         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1529
1530       // Expose the DAG combiner to the target combiner impls.
1531       TargetLowering::DAGCombinerInfo
1532         DagCombineInfo(DAG, Level, false, this);
1533
1534       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1535     }
1536   }
1537
1538   // If nothing happened still, try promoting the operation.
1539   if (!RV.getNode()) {
1540     switch (N->getOpcode()) {
1541     default: break;
1542     case ISD::ADD:
1543     case ISD::SUB:
1544     case ISD::MUL:
1545     case ISD::AND:
1546     case ISD::OR:
1547     case ISD::XOR:
1548       RV = PromoteIntBinOp(SDValue(N, 0));
1549       break;
1550     case ISD::SHL:
1551     case ISD::SRA:
1552     case ISD::SRL:
1553       RV = PromoteIntShiftOp(SDValue(N, 0));
1554       break;
1555     case ISD::SIGN_EXTEND:
1556     case ISD::ZERO_EXTEND:
1557     case ISD::ANY_EXTEND:
1558       RV = PromoteExtend(SDValue(N, 0));
1559       break;
1560     case ISD::LOAD:
1561       if (PromoteLoad(SDValue(N, 0)))
1562         RV = SDValue(N, 0);
1563       break;
1564     }
1565   }
1566
1567   // If N is a commutative binary node, try commuting it to enable more
1568   // sdisel CSE.
1569   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1570       N->getNumValues() == 1) {
1571     SDValue N0 = N->getOperand(0);
1572     SDValue N1 = N->getOperand(1);
1573
1574     // Constant operands are canonicalized to RHS.
1575     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1576       SDValue Ops[] = {N1, N0};
1577       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1578                                             N->getFlags());
1579       if (CSENode)
1580         return SDValue(CSENode, 0);
1581     }
1582   }
1583
1584   return RV;
1585 }
1586
1587 /// Given a node, return its input chain if it has one, otherwise return a null
1588 /// sd operand.
1589 static SDValue getInputChainForNode(SDNode *N) {
1590   if (unsigned NumOps = N->getNumOperands()) {
1591     if (N->getOperand(0).getValueType() == MVT::Other)
1592       return N->getOperand(0);
1593     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1594       return N->getOperand(NumOps-1);
1595     for (unsigned i = 1; i < NumOps-1; ++i)
1596       if (N->getOperand(i).getValueType() == MVT::Other)
1597         return N->getOperand(i);
1598   }
1599   return SDValue();
1600 }
1601
1602 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1603   // If N has two operands, where one has an input chain equal to the other,
1604   // the 'other' chain is redundant.
1605   if (N->getNumOperands() == 2) {
1606     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1607       return N->getOperand(0);
1608     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1609       return N->getOperand(1);
1610   }
1611
1612   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1613   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1614   SmallPtrSet<SDNode*, 16> SeenOps;
1615   bool Changed = false;             // If we should replace this token factor.
1616
1617   // Start out with this token factor.
1618   TFs.push_back(N);
1619
1620   // Iterate through token factors.  The TFs grows when new token factors are
1621   // encountered.
1622   for (unsigned i = 0; i < TFs.size(); ++i) {
1623     SDNode *TF = TFs[i];
1624
1625     // Check each of the operands.
1626     for (const SDValue &Op : TF->op_values()) {
1627
1628       switch (Op.getOpcode()) {
1629       case ISD::EntryToken:
1630         // Entry tokens don't need to be added to the list. They are
1631         // redundant.
1632         Changed = true;
1633         break;
1634
1635       case ISD::TokenFactor:
1636         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1637           // Queue up for processing.
1638           TFs.push_back(Op.getNode());
1639           // Clean up in case the token factor is removed.
1640           AddToWorklist(Op.getNode());
1641           Changed = true;
1642           break;
1643         }
1644         LLVM_FALLTHROUGH;
1645
1646       default:
1647         // Only add if it isn't already in the list.
1648         if (SeenOps.insert(Op.getNode()).second)
1649           Ops.push_back(Op);
1650         else
1651           Changed = true;
1652         break;
1653       }
1654     }
1655   }
1656
1657   // Remove Nodes that are chained to another node in the list. Do so
1658   // by walking up chains breath-first stopping when we've seen
1659   // another operand. In general we must climb to the EntryNode, but we can exit
1660   // early if we find all remaining work is associated with just one operand as
1661   // no further pruning is possible.
1662
1663   // List of nodes to search through and original Ops from which they originate.
1664   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1665   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1666   SmallPtrSet<SDNode *, 16> SeenChains;
1667   bool DidPruneOps = false;
1668
1669   unsigned NumLeftToConsider = 0;
1670   for (const SDValue &Op : Ops) {
1671     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1672     OpWorkCount.push_back(1);
1673   }
1674
1675   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1676     // If this is an Op, we can remove the op from the list. Remark any
1677     // search associated with it as from the current OpNumber.
1678     if (SeenOps.count(Op) != 0) {
1679       Changed = true;
1680       DidPruneOps = true;
1681       unsigned OrigOpNumber = 0;
1682       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1683         OrigOpNumber++;
1684       assert((OrigOpNumber != Ops.size()) &&
1685              "expected to find TokenFactor Operand");
1686       // Re-mark worklist from OrigOpNumber to OpNumber
1687       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1688         if (Worklist[i].second == OrigOpNumber) {
1689           Worklist[i].second = OpNumber;
1690         }
1691       }
1692       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1693       OpWorkCount[OrigOpNumber] = 0;
1694       NumLeftToConsider--;
1695     }
1696     // Add if it's a new chain
1697     if (SeenChains.insert(Op).second) {
1698       OpWorkCount[OpNumber]++;
1699       Worklist.push_back(std::make_pair(Op, OpNumber));
1700     }
1701   };
1702
1703   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1704     // We need at least be consider at least 2 Ops to prune.
1705     if (NumLeftToConsider <= 1)
1706       break;
1707     auto CurNode = Worklist[i].first;
1708     auto CurOpNumber = Worklist[i].second;
1709     assert((OpWorkCount[CurOpNumber] > 0) &&
1710            "Node should not appear in worklist");
1711     switch (CurNode->getOpcode()) {
1712     case ISD::EntryToken:
1713       // Hitting EntryToken is the only way for the search to terminate without
1714       // hitting
1715       // another operand's search. Prevent us from marking this operand
1716       // considered.
1717       NumLeftToConsider++;
1718       break;
1719     case ISD::TokenFactor:
1720       for (const SDValue &Op : CurNode->op_values())
1721         AddToWorklist(i, Op.getNode(), CurOpNumber);
1722       break;
1723     case ISD::CopyFromReg:
1724     case ISD::CopyToReg:
1725       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1726       break;
1727     default:
1728       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1729         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1730       break;
1731     }
1732     OpWorkCount[CurOpNumber]--;
1733     if (OpWorkCount[CurOpNumber] == 0)
1734       NumLeftToConsider--;
1735   }
1736
1737   // If we've changed things around then replace token factor.
1738   if (Changed) {
1739     SDValue Result;
1740     if (Ops.empty()) {
1741       // The entry token is the only possible outcome.
1742       Result = DAG.getEntryNode();
1743     } else {
1744       if (DidPruneOps) {
1745         SmallVector<SDValue, 8> PrunedOps;
1746         //
1747         for (const SDValue &Op : Ops) {
1748           if (SeenChains.count(Op.getNode()) == 0)
1749             PrunedOps.push_back(Op);
1750         }
1751         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1752       } else {
1753         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1754       }
1755     }
1756     return Result;
1757   }
1758   return SDValue();
1759 }
1760
1761 /// MERGE_VALUES can always be eliminated.
1762 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1763   WorklistRemover DeadNodes(*this);
1764   // Replacing results may cause a different MERGE_VALUES to suddenly
1765   // be CSE'd with N, and carry its uses with it. Iterate until no
1766   // uses remain, to ensure that the node can be safely deleted.
1767   // First add the users of this node to the work list so that they
1768   // can be tried again once they have new operands.
1769   AddUsersToWorklist(N);
1770   do {
1771     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1772       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1773   } while (!N->use_empty());
1774   deleteAndRecombine(N);
1775   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1776 }
1777
1778 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1779 /// ConstantSDNode pointer else nullptr.
1780 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1781   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1782   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1783 }
1784
1785 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1786   auto BinOpcode = BO->getOpcode();
1787   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1788           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1789           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1790           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1791           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1792           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1793           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1794           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1795           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1796          "Unexpected binary operator");
1797
1798   // Bail out if any constants are opaque because we can't constant fold those.
1799   SDValue C1 = BO->getOperand(1);
1800   if (!isConstantOrConstantVector(C1, true) &&
1801       !isConstantFPBuildVectorOrConstantFP(C1))
1802     return SDValue();
1803
1804   // Don't do this unless the old select is going away. We want to eliminate the
1805   // binary operator, not replace a binop with a select.
1806   // TODO: Handle ISD::SELECT_CC.
1807   SDValue Sel = BO->getOperand(0);
1808   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1809     return SDValue();
1810
1811   SDValue CT = Sel.getOperand(1);
1812   if (!isConstantOrConstantVector(CT, true) &&
1813       !isConstantFPBuildVectorOrConstantFP(CT))
1814     return SDValue();
1815
1816   SDValue CF = Sel.getOperand(2);
1817   if (!isConstantOrConstantVector(CF, true) &&
1818       !isConstantFPBuildVectorOrConstantFP(CF))
1819     return SDValue();
1820
1821   // We have a select-of-constants followed by a binary operator with a
1822   // constant. Eliminate the binop by pulling the constant math into the select.
1823   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1824   EVT VT = Sel.getValueType();
1825   SDLoc DL(Sel);
1826   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1827   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1828           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1829          "Failed to constant fold a binop with constant operands");
1830
1831   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1832   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1833           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1834          "Failed to constant fold a binop with constant operands");
1835
1836   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1837 }
1838
1839 SDValue DAGCombiner::visitADD(SDNode *N) {
1840   SDValue N0 = N->getOperand(0);
1841   SDValue N1 = N->getOperand(1);
1842   EVT VT = N0.getValueType();
1843   SDLoc DL(N);
1844
1845   // fold vector ops
1846   if (VT.isVector()) {
1847     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1848       return FoldedVOp;
1849
1850     // fold (add x, 0) -> x, vector edition
1851     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1852       return N0;
1853     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1854       return N1;
1855   }
1856
1857   // fold (add x, undef) -> undef
1858   if (N0.isUndef())
1859     return N0;
1860
1861   if (N1.isUndef())
1862     return N1;
1863
1864   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1865     // canonicalize constant to RHS
1866     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1867       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1868     // fold (add c1, c2) -> c1+c2
1869     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1870                                       N1.getNode());
1871   }
1872
1873   // fold (add x, 0) -> x
1874   if (isNullConstant(N1))
1875     return N0;
1876
1877   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1878     // fold ((c1-A)+c2) -> (c1+c2)-A
1879     if (N0.getOpcode() == ISD::SUB &&
1880         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1881       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1882       return DAG.getNode(ISD::SUB, DL, VT,
1883                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1884                          N0.getOperand(1));
1885     }
1886
1887     // add (sext i1 X), 1 -> zext (not i1 X)
1888     // We don't transform this pattern:
1889     //   add (zext i1 X), -1 -> sext (not i1 X)
1890     // because most (?) targets generate better code for the zext form.
1891     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1892         isOneConstantOrOneSplatConstant(N1)) {
1893       SDValue X = N0.getOperand(0);
1894       if ((!LegalOperations ||
1895            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1896             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1897           X.getScalarValueSizeInBits() == 1) {
1898         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1899         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1900       }
1901     }
1902   }
1903
1904   if (SDValue NewSel = foldBinOpIntoSelect(N))
1905     return NewSel;
1906
1907   // reassociate add
1908   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1909     return RADD;
1910
1911   // fold ((0-A) + B) -> B-A
1912   if (N0.getOpcode() == ISD::SUB &&
1913       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1914     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1915
1916   // fold (A + (0-B)) -> A-B
1917   if (N1.getOpcode() == ISD::SUB &&
1918       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1919     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1920
1921   // fold (A+(B-A)) -> B
1922   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1923     return N1.getOperand(0);
1924
1925   // fold ((B-A)+A) -> B
1926   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1927     return N0.getOperand(0);
1928
1929   // fold (A+(B-(A+C))) to (B-C)
1930   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1931       N0 == N1.getOperand(1).getOperand(0))
1932     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1933                        N1.getOperand(1).getOperand(1));
1934
1935   // fold (A+(B-(C+A))) to (B-C)
1936   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1937       N0 == N1.getOperand(1).getOperand(1))
1938     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1939                        N1.getOperand(1).getOperand(0));
1940
1941   // fold (A+((B-A)+or-C)) to (B+or-C)
1942   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1943       N1.getOperand(0).getOpcode() == ISD::SUB &&
1944       N0 == N1.getOperand(0).getOperand(1))
1945     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1946                        N1.getOperand(1));
1947
1948   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1949   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1950     SDValue N00 = N0.getOperand(0);
1951     SDValue N01 = N0.getOperand(1);
1952     SDValue N10 = N1.getOperand(0);
1953     SDValue N11 = N1.getOperand(1);
1954
1955     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1956       return DAG.getNode(ISD::SUB, DL, VT,
1957                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1958                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1959   }
1960
1961   if (SimplifyDemandedBits(SDValue(N, 0)))
1962     return SDValue(N, 0);
1963
1964   // fold (a+b) -> (a|b) iff a and b share no bits.
1965   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1966       DAG.haveNoCommonBitsSet(N0, N1))
1967     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1968
1969   if (SDValue Combined = visitADDLike(N0, N1, N))
1970     return Combined;
1971
1972   if (SDValue Combined = visitADDLike(N1, N0, N))
1973     return Combined;
1974
1975   return SDValue();
1976 }
1977
1978 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
1979   bool Masked = false;
1980
1981   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
1982   while (true) {
1983     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
1984       V = V.getOperand(0);
1985       continue;
1986     }
1987
1988     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
1989       Masked = true;
1990       V = V.getOperand(0);
1991       continue;
1992     }
1993
1994     break;
1995   }
1996
1997   // If this is not a carry, return.
1998   if (V.getResNo() != 1)
1999     return SDValue();
2000
2001   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2002       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2003     return SDValue();
2004
2005   // If the result is masked, then no matter what kind of bool it is we can
2006   // return. If it isn't, then we need to make sure the bool type is either 0 or
2007   // 1 and not other values.
2008   if (Masked ||
2009       TLI.getBooleanContents(V.getValueType()) ==
2010           TargetLoweringBase::ZeroOrOneBooleanContent)
2011     return V;
2012
2013   return SDValue();
2014 }
2015
2016 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2017   EVT VT = N0.getValueType();
2018   SDLoc DL(LocReference);
2019
2020   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2021   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2022       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2023     return DAG.getNode(ISD::SUB, DL, VT, N0,
2024                        DAG.getNode(ISD::SHL, DL, VT,
2025                                    N1.getOperand(0).getOperand(1),
2026                                    N1.getOperand(1)));
2027
2028   if (N1.getOpcode() == ISD::AND) {
2029     SDValue AndOp0 = N1.getOperand(0);
2030     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2031     unsigned DestBits = VT.getScalarSizeInBits();
2032
2033     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2034     // and similar xforms where the inner op is either ~0 or 0.
2035     if (NumSignBits == DestBits &&
2036         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2037       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2038   }
2039
2040   // add (sext i1), X -> sub X, (zext i1)
2041   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2042       N0.getOperand(0).getValueType() == MVT::i1 &&
2043       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2044     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2045     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2046   }
2047
2048   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2049   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2050     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2051     if (TN->getVT() == MVT::i1) {
2052       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2053                                  DAG.getConstant(1, DL, VT));
2054       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2055     }
2056   }
2057
2058   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2059   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2060     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2061                        N0, N1.getOperand(0), N1.getOperand(2));
2062
2063   // (add X, Carry) -> (addcarry X, 0, Carry)
2064   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2065     if (SDValue Carry = getAsCarry(TLI, N1))
2066       return DAG.getNode(ISD::ADDCARRY, DL,
2067                          DAG.getVTList(VT, Carry.getValueType()), N0,
2068                          DAG.getConstant(0, DL, VT), Carry);
2069
2070   return SDValue();
2071 }
2072
2073 SDValue DAGCombiner::visitADDC(SDNode *N) {
2074   SDValue N0 = N->getOperand(0);
2075   SDValue N1 = N->getOperand(1);
2076   EVT VT = N0.getValueType();
2077   SDLoc DL(N);
2078
2079   // If the flag result is dead, turn this into an ADD.
2080   if (!N->hasAnyUseOfValue(1))
2081     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2082                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2083
2084   // canonicalize constant to RHS.
2085   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2086   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2087   if (N0C && !N1C)
2088     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2089
2090   // fold (addc x, 0) -> x + no carry out
2091   if (isNullConstant(N1))
2092     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2093                                         DL, MVT::Glue));
2094
2095   // If it cannot overflow, transform into an add.
2096   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2097     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2098                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2099
2100   return SDValue();
2101 }
2102
2103 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2104   SDValue N0 = N->getOperand(0);
2105   SDValue N1 = N->getOperand(1);
2106   EVT VT = N0.getValueType();
2107   if (VT.isVector())
2108     return SDValue();
2109
2110   EVT CarryVT = N->getValueType(1);
2111   SDLoc DL(N);
2112
2113   // If the flag result is dead, turn this into an ADD.
2114   if (!N->hasAnyUseOfValue(1))
2115     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2116                      DAG.getUNDEF(CarryVT));
2117
2118   // canonicalize constant to RHS.
2119   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2120   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2121   if (N0C && !N1C)
2122     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2123
2124   // fold (uaddo x, 0) -> x + no carry out
2125   if (isNullConstant(N1))
2126     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2127
2128   // If it cannot overflow, transform into an add.
2129   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2130     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2131                      DAG.getConstant(0, DL, CarryVT));
2132
2133   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2134     return Combined;
2135
2136   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2137     return Combined;
2138
2139   return SDValue();
2140 }
2141
2142 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2143   auto VT = N0.getValueType();
2144
2145   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2146   // If Y + 1 cannot overflow.
2147   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2148     SDValue Y = N1.getOperand(0);
2149     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2150     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2151       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2152                          N1.getOperand(2));
2153   }
2154
2155   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2156   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2157     if (SDValue Carry = getAsCarry(TLI, N1))
2158       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2159                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2160
2161   return SDValue();
2162 }
2163
2164 SDValue DAGCombiner::visitADDE(SDNode *N) {
2165   SDValue N0 = N->getOperand(0);
2166   SDValue N1 = N->getOperand(1);
2167   SDValue CarryIn = N->getOperand(2);
2168
2169   // canonicalize constant to RHS
2170   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2171   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2172   if (N0C && !N1C)
2173     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2174                        N1, N0, CarryIn);
2175
2176   // fold (adde x, y, false) -> (addc x, y)
2177   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2178     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2179
2180   return SDValue();
2181 }
2182
2183 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2184   SDValue N0 = N->getOperand(0);
2185   SDValue N1 = N->getOperand(1);
2186   SDValue CarryIn = N->getOperand(2);
2187   SDLoc DL(N);
2188
2189   // canonicalize constant to RHS
2190   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2191   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2192   if (N0C && !N1C)
2193     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2194
2195   // fold (addcarry x, y, false) -> (uaddo x, y)
2196   if (isNullConstant(CarryIn))
2197     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2198
2199   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2200   if (isNullConstant(N0) && isNullConstant(N1)) {
2201     EVT VT = N0.getValueType();
2202     EVT CarryVT = CarryIn.getValueType();
2203     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2204     AddToWorklist(CarryExt.getNode());
2205     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2206                                     DAG.getConstant(1, DL, VT)),
2207                      DAG.getConstant(0, DL, CarryVT));
2208   }
2209
2210   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2211     return Combined;
2212
2213   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2214     return Combined;
2215
2216   return SDValue();
2217 }
2218
2219 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2220                                        SDNode *N) {
2221   // Iff the flag result is dead:
2222   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2223   if ((N0.getOpcode() == ISD::ADD ||
2224        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2225       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2226     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2227                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2228
2229   /**
2230    * When one of the addcarry argument is itself a carry, we may be facing
2231    * a diamond carry propagation. In which case we try to transform the DAG
2232    * to ensure linear carry propagation if that is possible.
2233    *
2234    * We are trying to get:
2235    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2236    */
2237   if (auto Y = getAsCarry(TLI, N1)) {
2238     /**
2239      *            (uaddo A, B)
2240      *             /       \
2241      *          Carry      Sum
2242      *            |          \
2243      *            | (addcarry *, 0, Z)
2244      *            |       /
2245      *             \   Carry
2246      *              |   /
2247      * (addcarry X, *, *)
2248      */
2249     if (Y.getOpcode() == ISD::UADDO &&
2250         CarryIn.getResNo() == 1 &&
2251         CarryIn.getOpcode() == ISD::ADDCARRY &&
2252         isNullConstant(CarryIn.getOperand(1)) &&
2253         CarryIn.getOperand(0) == Y.getValue(0)) {
2254       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2255                               Y.getOperand(0), Y.getOperand(1),
2256                               CarryIn.getOperand(2));
2257       AddToWorklist(NewY.getNode());
2258       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2259                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2260                          NewY.getValue(1));
2261     }
2262   }
2263
2264   return SDValue();
2265 }
2266
2267 // Since it may not be valid to emit a fold to zero for vector initializers
2268 // check if we can before folding.
2269 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2270                              SelectionDAG &DAG, bool LegalOperations,
2271                              bool LegalTypes) {
2272   if (!VT.isVector())
2273     return DAG.getConstant(0, DL, VT);
2274   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2275     return DAG.getConstant(0, DL, VT);
2276   return SDValue();
2277 }
2278
2279 SDValue DAGCombiner::visitSUB(SDNode *N) {
2280   SDValue N0 = N->getOperand(0);
2281   SDValue N1 = N->getOperand(1);
2282   EVT VT = N0.getValueType();
2283   SDLoc DL(N);
2284
2285   // fold vector ops
2286   if (VT.isVector()) {
2287     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2288       return FoldedVOp;
2289
2290     // fold (sub x, 0) -> x, vector edition
2291     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2292       return N0;
2293   }
2294
2295   // fold (sub x, x) -> 0
2296   // FIXME: Refactor this and xor and other similar operations together.
2297   if (N0 == N1)
2298     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2299   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2300       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2301     // fold (sub c1, c2) -> c1-c2
2302     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2303                                       N1.getNode());
2304   }
2305
2306   if (SDValue NewSel = foldBinOpIntoSelect(N))
2307     return NewSel;
2308
2309   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2310
2311   // fold (sub x, c) -> (add x, -c)
2312   if (N1C) {
2313     return DAG.getNode(ISD::ADD, DL, VT, N0,
2314                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2315   }
2316
2317   if (isNullConstantOrNullSplatConstant(N0)) {
2318     unsigned BitWidth = VT.getScalarSizeInBits();
2319     // Right-shifting everything out but the sign bit followed by negation is
2320     // the same as flipping arithmetic/logical shift type without the negation:
2321     // -(X >>u 31) -> (X >>s 31)
2322     // -(X >>s 31) -> (X >>u 31)
2323     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2324       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2325       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2326         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2327         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2328           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2329       }
2330     }
2331
2332     // 0 - X --> 0 if the sub is NUW.
2333     if (N->getFlags().hasNoUnsignedWrap())
2334       return N0;
2335
2336     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2337       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2338       // N1 must be 0 because negating the minimum signed value is undefined.
2339       if (N->getFlags().hasNoSignedWrap())
2340         return N0;
2341
2342       // 0 - X --> X if X is 0 or the minimum signed value.
2343       return N1;
2344     }
2345   }
2346
2347   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2348   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2349     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2350
2351   // fold A-(A-B) -> B
2352   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2353     return N1.getOperand(1);
2354
2355   // fold (A+B)-A -> B
2356   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2357     return N0.getOperand(1);
2358
2359   // fold (A+B)-B -> A
2360   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2361     return N0.getOperand(0);
2362
2363   // fold C2-(A+C1) -> (C2-C1)-A
2364   if (N1.getOpcode() == ISD::ADD) {
2365     SDValue N11 = N1.getOperand(1);
2366     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2367         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2368       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2369       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2370     }
2371   }
2372
2373   // fold ((A+(B+or-C))-B) -> A+or-C
2374   if (N0.getOpcode() == ISD::ADD &&
2375       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2376        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2377       N0.getOperand(1).getOperand(0) == N1)
2378     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2379                        N0.getOperand(1).getOperand(1));
2380
2381   // fold ((A+(C+B))-B) -> A+C
2382   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2383       N0.getOperand(1).getOperand(1) == N1)
2384     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2385                        N0.getOperand(1).getOperand(0));
2386
2387   // fold ((A-(B-C))-C) -> A-B
2388   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2389       N0.getOperand(1).getOperand(1) == N1)
2390     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2391                        N0.getOperand(1).getOperand(0));
2392
2393   // If either operand of a sub is undef, the result is undef
2394   if (N0.isUndef())
2395     return N0;
2396   if (N1.isUndef())
2397     return N1;
2398
2399   // If the relocation model supports it, consider symbol offsets.
2400   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2401     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2402       // fold (sub Sym, c) -> Sym-c
2403       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2404         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2405                                     GA->getOffset() -
2406                                         (uint64_t)N1C->getSExtValue());
2407       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2408       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2409         if (GA->getGlobal() == GB->getGlobal())
2410           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2411                                  DL, VT);
2412     }
2413
2414   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2415   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2416     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2417     if (TN->getVT() == MVT::i1) {
2418       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2419                                  DAG.getConstant(1, DL, VT));
2420       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2421     }
2422   }
2423
2424   return SDValue();
2425 }
2426
2427 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2428   SDValue N0 = N->getOperand(0);
2429   SDValue N1 = N->getOperand(1);
2430   EVT VT = N0.getValueType();
2431   SDLoc DL(N);
2432
2433   // If the flag result is dead, turn this into an SUB.
2434   if (!N->hasAnyUseOfValue(1))
2435     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2436                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2437
2438   // fold (subc x, x) -> 0 + no borrow
2439   if (N0 == N1)
2440     return CombineTo(N, DAG.getConstant(0, DL, VT),
2441                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2442
2443   // fold (subc x, 0) -> x + no borrow
2444   if (isNullConstant(N1))
2445     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2446
2447   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2448   if (isAllOnesConstant(N0))
2449     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2450                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2451
2452   return SDValue();
2453 }
2454
2455 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2456   SDValue N0 = N->getOperand(0);
2457   SDValue N1 = N->getOperand(1);
2458   EVT VT = N0.getValueType();
2459   if (VT.isVector())
2460     return SDValue();
2461
2462   EVT CarryVT = N->getValueType(1);
2463   SDLoc DL(N);
2464
2465   // If the flag result is dead, turn this into an SUB.
2466   if (!N->hasAnyUseOfValue(1))
2467     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2468                      DAG.getUNDEF(CarryVT));
2469
2470   // fold (usubo x, x) -> 0 + no borrow
2471   if (N0 == N1)
2472     return CombineTo(N, DAG.getConstant(0, DL, VT),
2473                      DAG.getConstant(0, DL, CarryVT));
2474
2475   // fold (usubo x, 0) -> x + no borrow
2476   if (isNullConstant(N1))
2477     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2478
2479   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2480   if (isAllOnesConstant(N0))
2481     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2482                      DAG.getConstant(0, DL, CarryVT));
2483
2484   return SDValue();
2485 }
2486
2487 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2488   SDValue N0 = N->getOperand(0);
2489   SDValue N1 = N->getOperand(1);
2490   SDValue CarryIn = N->getOperand(2);
2491
2492   // fold (sube x, y, false) -> (subc x, y)
2493   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2494     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2495
2496   return SDValue();
2497 }
2498
2499 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2500   SDValue N0 = N->getOperand(0);
2501   SDValue N1 = N->getOperand(1);
2502   SDValue CarryIn = N->getOperand(2);
2503
2504   // fold (subcarry x, y, false) -> (usubo x, y)
2505   if (isNullConstant(CarryIn))
2506     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2507
2508   return SDValue();
2509 }
2510
2511 SDValue DAGCombiner::visitMUL(SDNode *N) {
2512   SDValue N0 = N->getOperand(0);
2513   SDValue N1 = N->getOperand(1);
2514   EVT VT = N0.getValueType();
2515
2516   // fold (mul x, undef) -> 0
2517   if (N0.isUndef() || N1.isUndef())
2518     return DAG.getConstant(0, SDLoc(N), VT);
2519
2520   bool N0IsConst = false;
2521   bool N1IsConst = false;
2522   bool N1IsOpaqueConst = false;
2523   bool N0IsOpaqueConst = false;
2524   APInt ConstValue0, ConstValue1;
2525   // fold vector ops
2526   if (VT.isVector()) {
2527     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2528       return FoldedVOp;
2529
2530     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2531     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2532   } else {
2533     N0IsConst = isa<ConstantSDNode>(N0);
2534     if (N0IsConst) {
2535       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2536       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2537     }
2538     N1IsConst = isa<ConstantSDNode>(N1);
2539     if (N1IsConst) {
2540       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2541       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2542     }
2543   }
2544
2545   // fold (mul c1, c2) -> c1*c2
2546   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2547     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2548                                       N0.getNode(), N1.getNode());
2549
2550   // canonicalize constant to RHS (vector doesn't have to splat)
2551   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2552      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2553     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2554   // fold (mul x, 0) -> 0
2555   if (N1IsConst && ConstValue1.isNullValue())
2556     return N1;
2557   // We require a splat of the entire scalar bit width for non-contiguous
2558   // bit patterns.
2559   bool IsFullSplat =
2560     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2561   // fold (mul x, 1) -> x
2562   if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
2563     return N0;
2564
2565   if (SDValue NewSel = foldBinOpIntoSelect(N))
2566     return NewSel;
2567
2568   // fold (mul x, -1) -> 0-x
2569   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2570     SDLoc DL(N);
2571     return DAG.getNode(ISD::SUB, DL, VT,
2572                        DAG.getConstant(0, DL, VT), N0);
2573   }
2574   // fold (mul x, (1 << c)) -> x << c
2575   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2576       IsFullSplat) {
2577     SDLoc DL(N);
2578     return DAG.getNode(ISD::SHL, DL, VT, N0,
2579                        DAG.getConstant(ConstValue1.logBase2(), DL,
2580                                        getShiftAmountTy(N0.getValueType())));
2581   }
2582   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2583   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2584       IsFullSplat) {
2585     unsigned Log2Val = (-ConstValue1).logBase2();
2586     SDLoc DL(N);
2587     // FIXME: If the input is something that is easily negated (e.g. a
2588     // single-use add), we should put the negate there.
2589     return DAG.getNode(ISD::SUB, DL, VT,
2590                        DAG.getConstant(0, DL, VT),
2591                        DAG.getNode(ISD::SHL, DL, VT, N0,
2592                             DAG.getConstant(Log2Val, DL,
2593                                       getShiftAmountTy(N0.getValueType()))));
2594   }
2595
2596   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2597   if (N0.getOpcode() == ISD::SHL &&
2598       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2599       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2600     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2601     if (isConstantOrConstantVector(C3))
2602       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2603   }
2604
2605   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2606   // use.
2607   {
2608     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2609
2610     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2611     if (N0.getOpcode() == ISD::SHL &&
2612         isConstantOrConstantVector(N0.getOperand(1)) &&
2613         N0.getNode()->hasOneUse()) {
2614       Sh = N0; Y = N1;
2615     } else if (N1.getOpcode() == ISD::SHL &&
2616                isConstantOrConstantVector(N1.getOperand(1)) &&
2617                N1.getNode()->hasOneUse()) {
2618       Sh = N1; Y = N0;
2619     }
2620
2621     if (Sh.getNode()) {
2622       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2623       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2624     }
2625   }
2626
2627   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2628   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2629       N0.getOpcode() == ISD::ADD &&
2630       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2631       isMulAddWithConstProfitable(N, N0, N1))
2632       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2633                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2634                                      N0.getOperand(0), N1),
2635                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2636                                      N0.getOperand(1), N1));
2637
2638   // reassociate mul
2639   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2640     return RMUL;
2641
2642   return SDValue();
2643 }
2644
2645 /// Return true if divmod libcall is available.
2646 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2647                                      const TargetLowering &TLI) {
2648   RTLIB::Libcall LC;
2649   EVT NodeType = Node->getValueType(0);
2650   if (!NodeType.isSimple())
2651     return false;
2652   switch (NodeType.getSimpleVT().SimpleTy) {
2653   default: return false; // No libcall for vector types.
2654   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2655   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2656   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2657   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2658   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2659   }
2660
2661   return TLI.getLibcallName(LC) != nullptr;
2662 }
2663
2664 /// Issue divrem if both quotient and remainder are needed.
2665 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2666   if (Node->use_empty())
2667     return SDValue(); // This is a dead node, leave it alone.
2668
2669   unsigned Opcode = Node->getOpcode();
2670   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2671   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2672
2673   // DivMod lib calls can still work on non-legal types if using lib-calls.
2674   EVT VT = Node->getValueType(0);
2675   if (VT.isVector() || !VT.isInteger())
2676     return SDValue();
2677
2678   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2679     return SDValue();
2680
2681   // If DIVREM is going to get expanded into a libcall,
2682   // but there is no libcall available, then don't combine.
2683   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2684       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2685     return SDValue();
2686
2687   // If div is legal, it's better to do the normal expansion
2688   unsigned OtherOpcode = 0;
2689   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2690     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2691     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2692       return SDValue();
2693   } else {
2694     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2695     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2696       return SDValue();
2697   }
2698
2699   SDValue Op0 = Node->getOperand(0);
2700   SDValue Op1 = Node->getOperand(1);
2701   SDValue combined;
2702   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2703          UE = Op0.getNode()->use_end(); UI != UE;) {
2704     SDNode *User = *UI++;
2705     if (User == Node || User->use_empty())
2706       continue;
2707     // Convert the other matching node(s), too;
2708     // otherwise, the DIVREM may get target-legalized into something
2709     // target-specific that we won't be able to recognize.
2710     unsigned UserOpc = User->getOpcode();
2711     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2712         User->getOperand(0) == Op0 &&
2713         User->getOperand(1) == Op1) {
2714       if (!combined) {
2715         if (UserOpc == OtherOpcode) {
2716           SDVTList VTs = DAG.getVTList(VT, VT);
2717           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2718         } else if (UserOpc == DivRemOpc) {
2719           combined = SDValue(User, 0);
2720         } else {
2721           assert(UserOpc == Opcode);
2722           continue;
2723         }
2724       }
2725       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2726         CombineTo(User, combined);
2727       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2728         CombineTo(User, combined.getValue(1));
2729     }
2730   }
2731   return combined;
2732 }
2733
2734 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2735   SDValue N0 = N->getOperand(0);
2736   SDValue N1 = N->getOperand(1);
2737   EVT VT = N->getValueType(0);
2738   SDLoc DL(N);
2739
2740   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2741     return DAG.getUNDEF(VT);
2742
2743   // undef / X -> 0
2744   // undef % X -> 0
2745   if (N0.isUndef())
2746     return DAG.getConstant(0, DL, VT);
2747
2748   return SDValue();
2749 }
2750
2751 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2752   SDValue N0 = N->getOperand(0);
2753   SDValue N1 = N->getOperand(1);
2754   EVT VT = N->getValueType(0);
2755
2756   // fold vector ops
2757   if (VT.isVector())
2758     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2759       return FoldedVOp;
2760
2761   SDLoc DL(N);
2762
2763   // fold (sdiv c1, c2) -> c1/c2
2764   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2765   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2766   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2767     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2768   // fold (sdiv X, 1) -> X
2769   if (N1C && N1C->isOne())
2770     return N0;
2771   // fold (sdiv X, -1) -> 0-X
2772   if (N1C && N1C->isAllOnesValue())
2773     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2774
2775   if (SDValue V = simplifyDivRem(N, DAG))
2776     return V;
2777
2778   if (SDValue NewSel = foldBinOpIntoSelect(N))
2779     return NewSel;
2780
2781   // If we know the sign bits of both operands are zero, strength reduce to a
2782   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2783   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2784     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2785
2786   // fold (sdiv X, pow2) -> simple ops after legalize
2787   // FIXME: We check for the exact bit here because the generic lowering gives
2788   // better results in that case. The target-specific lowering should learn how
2789   // to handle exact sdivs efficiently.
2790   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2791       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2792                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2793     // Target-specific implementation of sdiv x, pow2.
2794     if (SDValue Res = BuildSDIVPow2(N))
2795       return Res;
2796
2797     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2798
2799     // Splat the sign bit into the register
2800     SDValue SGN =
2801         DAG.getNode(ISD::SRA, DL, VT, N0,
2802                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2803                                     getShiftAmountTy(N0.getValueType())));
2804     AddToWorklist(SGN.getNode());
2805
2806     // Add (N0 < 0) ? abs2 - 1 : 0;
2807     SDValue SRL =
2808         DAG.getNode(ISD::SRL, DL, VT, SGN,
2809                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2810                                     getShiftAmountTy(SGN.getValueType())));
2811     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2812     AddToWorklist(SRL.getNode());
2813     AddToWorklist(ADD.getNode());    // Divide by pow2
2814     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2815                   DAG.getConstant(lg2, DL,
2816                                   getShiftAmountTy(ADD.getValueType())));
2817
2818     // If we're dividing by a positive value, we're done.  Otherwise, we must
2819     // negate the result.
2820     if (N1C->getAPIntValue().isNonNegative())
2821       return SRA;
2822
2823     AddToWorklist(SRA.getNode());
2824     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2825   }
2826
2827   // If integer divide is expensive and we satisfy the requirements, emit an
2828   // alternate sequence.  Targets may check function attributes for size/speed
2829   // trade-offs.
2830   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2831   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2832     if (SDValue Op = BuildSDIV(N))
2833       return Op;
2834
2835   // sdiv, srem -> sdivrem
2836   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2837   // true.  Otherwise, we break the simplification logic in visitREM().
2838   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2839     if (SDValue DivRem = useDivRem(N))
2840         return DivRem;
2841
2842   return SDValue();
2843 }
2844
2845 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2846   SDValue N0 = N->getOperand(0);
2847   SDValue N1 = N->getOperand(1);
2848   EVT VT = N->getValueType(0);
2849
2850   // fold vector ops
2851   if (VT.isVector())
2852     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2853       return FoldedVOp;
2854
2855   SDLoc DL(N);
2856
2857   // fold (udiv c1, c2) -> c1/c2
2858   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2859   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2860   if (N0C && N1C)
2861     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2862                                                     N0C, N1C))
2863       return Folded;
2864
2865   if (SDValue V = simplifyDivRem(N, DAG))
2866     return V;
2867
2868   if (SDValue NewSel = foldBinOpIntoSelect(N))
2869     return NewSel;
2870
2871   // fold (udiv x, (1 << c)) -> x >>u c
2872   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2873       DAG.isKnownToBeAPowerOfTwo(N1)) {
2874     SDValue LogBase2 = BuildLogBase2(N1, DL);
2875     AddToWorklist(LogBase2.getNode());
2876
2877     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2878     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2879     AddToWorklist(Trunc.getNode());
2880     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2881   }
2882
2883   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2884   if (N1.getOpcode() == ISD::SHL) {
2885     SDValue N10 = N1.getOperand(0);
2886     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2887         DAG.isKnownToBeAPowerOfTwo(N10)) {
2888       SDValue LogBase2 = BuildLogBase2(N10, DL);
2889       AddToWorklist(LogBase2.getNode());
2890
2891       EVT ADDVT = N1.getOperand(1).getValueType();
2892       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2893       AddToWorklist(Trunc.getNode());
2894       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2895       AddToWorklist(Add.getNode());
2896       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2897     }
2898   }
2899
2900   // fold (udiv x, c) -> alternate
2901   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2902   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2903     if (SDValue Op = BuildUDIV(N))
2904       return Op;
2905
2906   // sdiv, srem -> sdivrem
2907   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2908   // true.  Otherwise, we break the simplification logic in visitREM().
2909   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2910     if (SDValue DivRem = useDivRem(N))
2911         return DivRem;
2912
2913   return SDValue();
2914 }
2915
2916 // handles ISD::SREM and ISD::UREM
2917 SDValue DAGCombiner::visitREM(SDNode *N) {
2918   unsigned Opcode = N->getOpcode();
2919   SDValue N0 = N->getOperand(0);
2920   SDValue N1 = N->getOperand(1);
2921   EVT VT = N->getValueType(0);
2922   bool isSigned = (Opcode == ISD::SREM);
2923   SDLoc DL(N);
2924
2925   // fold (rem c1, c2) -> c1%c2
2926   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2927   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2928   if (N0C && N1C)
2929     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2930       return Folded;
2931
2932   if (SDValue V = simplifyDivRem(N, DAG))
2933     return V;
2934
2935   if (SDValue NewSel = foldBinOpIntoSelect(N))
2936     return NewSel;
2937
2938   if (isSigned) {
2939     // If we know the sign bits of both operands are zero, strength reduce to a
2940     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2941     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2942       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2943   } else {
2944     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
2945     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2946       // fold (urem x, pow2) -> (and x, pow2-1)
2947       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2948       AddToWorklist(Add.getNode());
2949       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2950     }
2951     if (N1.getOpcode() == ISD::SHL &&
2952         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
2953       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2954       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2955       AddToWorklist(Add.getNode());
2956       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2957     }
2958   }
2959
2960   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2961
2962   // If X/C can be simplified by the division-by-constant logic, lower
2963   // X%C to the equivalent of X-X/C*C.
2964   // To avoid mangling nodes, this simplification requires that the combine()
2965   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2966   // against this by skipping the simplification if isIntDivCheap().  When
2967   // div is not cheap, combine will not return a DIVREM.  Regardless,
2968   // checking cheapness here makes sense since the simplification results in
2969   // fatter code.
2970   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2971     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2972     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2973     AddToWorklist(Div.getNode());
2974     SDValue OptimizedDiv = combine(Div.getNode());
2975     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2976       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2977              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2978       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2979       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2980       AddToWorklist(Mul.getNode());
2981       return Sub;
2982     }
2983   }
2984
2985   // sdiv, srem -> sdivrem
2986   if (SDValue DivRem = useDivRem(N))
2987     return DivRem.getValue(1);
2988
2989   return SDValue();
2990 }
2991
2992 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2993   SDValue N0 = N->getOperand(0);
2994   SDValue N1 = N->getOperand(1);
2995   EVT VT = N->getValueType(0);
2996   SDLoc DL(N);
2997
2998   // fold (mulhs x, 0) -> 0
2999   if (isNullConstant(N1))
3000     return N1;
3001   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3002   if (isOneConstant(N1)) {
3003     SDLoc DL(N);
3004     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3005                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3006                                        getShiftAmountTy(N0.getValueType())));
3007   }
3008   // fold (mulhs x, undef) -> 0
3009   if (N0.isUndef() || N1.isUndef())
3010     return DAG.getConstant(0, SDLoc(N), VT);
3011
3012   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3013   // plus a shift.
3014   if (VT.isSimple() && !VT.isVector()) {
3015     MVT Simple = VT.getSimpleVT();
3016     unsigned SimpleSize = Simple.getSizeInBits();
3017     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3018     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3019       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3020       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3021       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3022       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3023             DAG.getConstant(SimpleSize, DL,
3024                             getShiftAmountTy(N1.getValueType())));
3025       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3026     }
3027   }
3028
3029   return SDValue();
3030 }
3031
3032 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3033   SDValue N0 = N->getOperand(0);
3034   SDValue N1 = N->getOperand(1);
3035   EVT VT = N->getValueType(0);
3036   SDLoc DL(N);
3037
3038   // fold (mulhu x, 0) -> 0
3039   if (isNullConstant(N1))
3040     return N1;
3041   // fold (mulhu x, 1) -> 0
3042   if (isOneConstant(N1))
3043     return DAG.getConstant(0, DL, N0.getValueType());
3044   // fold (mulhu x, undef) -> 0
3045   if (N0.isUndef() || N1.isUndef())
3046     return DAG.getConstant(0, DL, VT);
3047
3048   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3049   // plus a shift.
3050   if (VT.isSimple() && !VT.isVector()) {
3051     MVT Simple = VT.getSimpleVT();
3052     unsigned SimpleSize = Simple.getSizeInBits();
3053     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3054     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3055       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3056       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3057       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3058       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3059             DAG.getConstant(SimpleSize, DL,
3060                             getShiftAmountTy(N1.getValueType())));
3061       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3062     }
3063   }
3064
3065   return SDValue();
3066 }
3067
3068 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3069 /// give the opcodes for the two computations that are being performed. Return
3070 /// true if a simplification was made.
3071 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3072                                                 unsigned HiOp) {
3073   // If the high half is not needed, just compute the low half.
3074   bool HiExists = N->hasAnyUseOfValue(1);
3075   if (!HiExists &&
3076       (!LegalOperations ||
3077        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3078     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3079     return CombineTo(N, Res, Res);
3080   }
3081
3082   // If the low half is not needed, just compute the high half.
3083   bool LoExists = N->hasAnyUseOfValue(0);
3084   if (!LoExists &&
3085       (!LegalOperations ||
3086        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3087     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3088     return CombineTo(N, Res, Res);
3089   }
3090
3091   // If both halves are used, return as it is.
3092   if (LoExists && HiExists)
3093     return SDValue();
3094
3095   // If the two computed results can be simplified separately, separate them.
3096   if (LoExists) {
3097     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3098     AddToWorklist(Lo.getNode());
3099     SDValue LoOpt = combine(Lo.getNode());
3100     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3101         (!LegalOperations ||
3102          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3103       return CombineTo(N, LoOpt, LoOpt);
3104   }
3105
3106   if (HiExists) {
3107     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3108     AddToWorklist(Hi.getNode());
3109     SDValue HiOpt = combine(Hi.getNode());
3110     if (HiOpt.getNode() && HiOpt != Hi &&
3111         (!LegalOperations ||
3112          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3113       return CombineTo(N, HiOpt, HiOpt);
3114   }
3115
3116   return SDValue();
3117 }
3118
3119 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3120   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3121     return Res;
3122
3123   EVT VT = N->getValueType(0);
3124   SDLoc DL(N);
3125
3126   // If the type is twice as wide is legal, transform the mulhu to a wider
3127   // multiply plus a shift.
3128   if (VT.isSimple() && !VT.isVector()) {
3129     MVT Simple = VT.getSimpleVT();
3130     unsigned SimpleSize = Simple.getSizeInBits();
3131     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3132     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3133       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3134       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3135       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3136       // Compute the high part as N1.
3137       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3138             DAG.getConstant(SimpleSize, DL,
3139                             getShiftAmountTy(Lo.getValueType())));
3140       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3141       // Compute the low part as N0.
3142       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3143       return CombineTo(N, Lo, Hi);
3144     }
3145   }
3146
3147   return SDValue();
3148 }
3149
3150 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3151   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3152     return Res;
3153
3154   EVT VT = N->getValueType(0);
3155   SDLoc DL(N);
3156
3157   // If the type is twice as wide is legal, transform the mulhu to a wider
3158   // multiply plus a shift.
3159   if (VT.isSimple() && !VT.isVector()) {
3160     MVT Simple = VT.getSimpleVT();
3161     unsigned SimpleSize = Simple.getSizeInBits();
3162     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3163     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3164       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3165       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3166       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3167       // Compute the high part as N1.
3168       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3169             DAG.getConstant(SimpleSize, DL,
3170                             getShiftAmountTy(Lo.getValueType())));
3171       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3172       // Compute the low part as N0.
3173       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3174       return CombineTo(N, Lo, Hi);
3175     }
3176   }
3177
3178   return SDValue();
3179 }
3180
3181 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3182   // (smulo x, 2) -> (saddo x, x)
3183   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3184     if (C2->getAPIntValue() == 2)
3185       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3186                          N->getOperand(0), N->getOperand(0));
3187
3188   return SDValue();
3189 }
3190
3191 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3192   // (umulo x, 2) -> (uaddo x, x)
3193   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3194     if (C2->getAPIntValue() == 2)
3195       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3196                          N->getOperand(0), N->getOperand(0));
3197
3198   return SDValue();
3199 }
3200
3201 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3202   SDValue N0 = N->getOperand(0);
3203   SDValue N1 = N->getOperand(1);
3204   EVT VT = N0.getValueType();
3205
3206   // fold vector ops
3207   if (VT.isVector())
3208     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3209       return FoldedVOp;
3210
3211   // fold (add c1, c2) -> c1+c2
3212   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3213   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3214   if (N0C && N1C)
3215     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3216
3217   // canonicalize constant to RHS
3218   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3219      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3220     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3221
3222   return SDValue();
3223 }
3224
3225 /// If this is a binary operator with two operands of the same opcode, try to
3226 /// simplify it.
3227 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3228   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3229   EVT VT = N0.getValueType();
3230   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3231
3232   // Bail early if none of these transforms apply.
3233   if (N0.getNumOperands() == 0) return SDValue();
3234
3235   // For each of OP in AND/OR/XOR:
3236   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3237   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3238   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3239   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3240   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3241   //
3242   // do not sink logical op inside of a vector extend, since it may combine
3243   // into a vsetcc.
3244   EVT Op0VT = N0.getOperand(0).getValueType();
3245   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3246        N0.getOpcode() == ISD::SIGN_EXTEND ||
3247        N0.getOpcode() == ISD::BSWAP ||
3248        // Avoid infinite looping with PromoteIntBinOp.
3249        (N0.getOpcode() == ISD::ANY_EXTEND &&
3250         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3251        (N0.getOpcode() == ISD::TRUNCATE &&
3252         (!TLI.isZExtFree(VT, Op0VT) ||
3253          !TLI.isTruncateFree(Op0VT, VT)) &&
3254         TLI.isTypeLegal(Op0VT))) &&
3255       !VT.isVector() &&
3256       Op0VT == N1.getOperand(0).getValueType() &&
3257       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3258     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3259                                  N0.getOperand(0).getValueType(),
3260                                  N0.getOperand(0), N1.getOperand(0));
3261     AddToWorklist(ORNode.getNode());
3262     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3263   }
3264
3265   // For each of OP in SHL/SRL/SRA/AND...
3266   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3267   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3268   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3269   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3270        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3271       N0.getOperand(1) == N1.getOperand(1)) {
3272     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3273                                  N0.getOperand(0).getValueType(),
3274                                  N0.getOperand(0), N1.getOperand(0));
3275     AddToWorklist(ORNode.getNode());
3276     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3277                        ORNode, N0.getOperand(1));
3278   }
3279
3280   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3281   // Only perform this optimization up until type legalization, before
3282   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3283   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3284   // we don't want to undo this promotion.
3285   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3286   // on scalars.
3287   if ((N0.getOpcode() == ISD::BITCAST ||
3288        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3289        Level <= AfterLegalizeTypes) {
3290     SDValue In0 = N0.getOperand(0);
3291     SDValue In1 = N1.getOperand(0);
3292     EVT In0Ty = In0.getValueType();
3293     EVT In1Ty = In1.getValueType();
3294     SDLoc DL(N);
3295     // If both incoming values are integers, and the original types are the
3296     // same.
3297     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3298       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3299       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3300       AddToWorklist(Op.getNode());
3301       return BC;
3302     }
3303   }
3304
3305   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3306   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3307   // If both shuffles use the same mask, and both shuffle within a single
3308   // vector, then it is worthwhile to move the swizzle after the operation.
3309   // The type-legalizer generates this pattern when loading illegal
3310   // vector types from memory. In many cases this allows additional shuffle
3311   // optimizations.
3312   // There are other cases where moving the shuffle after the xor/and/or
3313   // is profitable even if shuffles don't perform a swizzle.
3314   // If both shuffles use the same mask, and both shuffles have the same first
3315   // or second operand, then it might still be profitable to move the shuffle
3316   // after the xor/and/or operation.
3317   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3318     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3319     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3320
3321     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3322            "Inputs to shuffles are not the same type");
3323
3324     // Check that both shuffles use the same mask. The masks are known to be of
3325     // the same length because the result vector type is the same.
3326     // Check also that shuffles have only one use to avoid introducing extra
3327     // instructions.
3328     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3329         SVN0->getMask().equals(SVN1->getMask())) {
3330       SDValue ShOp = N0->getOperand(1);
3331
3332       // Don't try to fold this node if it requires introducing a
3333       // build vector of all zeros that might be illegal at this stage.
3334       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3335         if (!LegalTypes)
3336           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3337         else
3338           ShOp = SDValue();
3339       }
3340
3341       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3342       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3343       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3344       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3345         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3346                                       N0->getOperand(0), N1->getOperand(0));
3347         AddToWorklist(NewNode.getNode());
3348         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3349                                     SVN0->getMask());
3350       }
3351
3352       // Don't try to fold this node if it requires introducing a
3353       // build vector of all zeros that might be illegal at this stage.
3354       ShOp = N0->getOperand(0);
3355       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3356         if (!LegalTypes)
3357           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3358         else
3359           ShOp = SDValue();
3360       }
3361
3362       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3363       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3364       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3365       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3366         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3367                                       N0->getOperand(1), N1->getOperand(1));
3368         AddToWorklist(NewNode.getNode());
3369         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3370                                     SVN0->getMask());
3371       }
3372     }
3373   }
3374
3375   return SDValue();
3376 }
3377
3378 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3379 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3380                                        const SDLoc &DL) {
3381   SDValue LL, LR, RL, RR, N0CC, N1CC;
3382   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3383       !isSetCCEquivalent(N1, RL, RR, N1CC))
3384     return SDValue();
3385
3386   assert(N0.getValueType() == N1.getValueType() &&
3387          "Unexpected operand types for bitwise logic op");
3388   assert(LL.getValueType() == LR.getValueType() &&
3389          RL.getValueType() == RR.getValueType() &&
3390          "Unexpected operand types for setcc");
3391
3392   // If we're here post-legalization or the logic op type is not i1, the logic
3393   // op type must match a setcc result type. Also, all folds require new
3394   // operations on the left and right operands, so those types must match.
3395   EVT VT = N0.getValueType();
3396   EVT OpVT = LL.getValueType();
3397   if (LegalOperations || VT != MVT::i1)
3398     if (VT != getSetCCResultType(OpVT))
3399       return SDValue();
3400   if (OpVT != RL.getValueType())
3401     return SDValue();
3402
3403   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3404   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3405   bool IsInteger = OpVT.isInteger();
3406   if (LR == RR && CC0 == CC1 && IsInteger) {
3407     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3408     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3409
3410     // All bits clear?
3411     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3412     // All sign bits clear?
3413     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3414     // Any bits set?
3415     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3416     // Any sign bits set?
3417     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3418
3419     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3420     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3421     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3422     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3423     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3424       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3425       AddToWorklist(Or.getNode());
3426       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3427     }
3428
3429     // All bits set?
3430     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3431     // All sign bits set?
3432     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3433     // Any bits clear?
3434     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3435     // Any sign bits clear?
3436     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3437
3438     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3439     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3440     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3441     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3442     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3443       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3444       AddToWorklist(And.getNode());
3445       return DAG.getSetCC(DL, VT, And, LR, CC1);
3446     }
3447   }
3448
3449   // TODO: What is the 'or' equivalent of this fold?
3450   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3451   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3452       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3453        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3454     SDValue One = DAG.getConstant(1, DL, OpVT);
3455     SDValue Two = DAG.getConstant(2, DL, OpVT);
3456     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3457     AddToWorklist(Add.getNode());
3458     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3459   }
3460
3461   // Try more general transforms if the predicates match and the only user of
3462   // the compares is the 'and' or 'or'.
3463   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3464       N0.hasOneUse() && N1.hasOneUse()) {
3465     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3466     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3467     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3468       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3469       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3470       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3471       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3472       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3473     }
3474   }
3475
3476   // Canonicalize equivalent operands to LL == RL.
3477   if (LL == RR && LR == RL) {
3478     CC1 = ISD::getSetCCSwappedOperands(CC1);
3479     std::swap(RL, RR);
3480   }
3481
3482   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3483   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3484   if (LL == RL && LR == RR) {
3485     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3486                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3487     if (NewCC != ISD::SETCC_INVALID &&
3488         (!LegalOperations ||
3489          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3490           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3491       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3492   }
3493
3494   return SDValue();
3495 }
3496
3497 /// This contains all DAGCombine rules which reduce two values combined by
3498 /// an And operation to a single value. This makes them reusable in the context
3499 /// of visitSELECT(). Rules involving constants are not included as
3500 /// visitSELECT() already handles those cases.
3501 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3502   EVT VT = N1.getValueType();
3503   SDLoc DL(N);
3504
3505   // fold (and x, undef) -> 0
3506   if (N0.isUndef() || N1.isUndef())
3507     return DAG.getConstant(0, DL, VT);
3508
3509   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3510     return V;
3511
3512   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3513       VT.getSizeInBits() <= 64) {
3514     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3515       APInt ADDC = ADDI->getAPIntValue();
3516       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3517         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3518         // immediate for an add, but it is legal if its top c2 bits are set,
3519         // transform the ADD so the immediate doesn't need to be materialized
3520         // in a register.
3521         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3522           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3523                                              SRLI->getZExtValue());
3524           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3525             ADDC |= Mask;
3526             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3527               SDLoc DL0(N0);
3528               SDValue NewAdd =
3529                 DAG.getNode(ISD::ADD, DL0, VT,
3530                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3531               CombineTo(N0.getNode(), NewAdd);
3532               // Return N so it doesn't get rechecked!
3533               return SDValue(N, 0);
3534             }
3535           }
3536         }
3537       }
3538     }
3539   }
3540
3541   // Reduce bit extract of low half of an integer to the narrower type.
3542   // (and (srl i64:x, K), KMask) ->
3543   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3544   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3545     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3546       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3547         unsigned Size = VT.getSizeInBits();
3548         const APInt &AndMask = CAnd->getAPIntValue();
3549         unsigned ShiftBits = CShift->getZExtValue();
3550
3551         // Bail out, this node will probably disappear anyway.
3552         if (ShiftBits == 0)
3553           return SDValue();
3554
3555         unsigned MaskBits = AndMask.countTrailingOnes();
3556         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3557
3558         if (AndMask.isMask() &&
3559             // Required bits must not span the two halves of the integer and
3560             // must fit in the half size type.
3561             (ShiftBits + MaskBits <= Size / 2) &&
3562             TLI.isNarrowingProfitable(VT, HalfVT) &&
3563             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3564             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3565             TLI.isTruncateFree(VT, HalfVT) &&
3566             TLI.isZExtFree(HalfVT, VT)) {
3567           // The isNarrowingProfitable is to avoid regressions on PPC and
3568           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3569           // on downstream users of this. Those patterns could probably be
3570           // extended to handle extensions mixed in.
3571
3572           SDValue SL(N0);
3573           assert(MaskBits <= Size);
3574
3575           // Extracting the highest bit of the low half.
3576           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3577           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3578                                       N0.getOperand(0));
3579
3580           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3581           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3582           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3583           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3584           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3585         }
3586       }
3587     }
3588   }
3589
3590   return SDValue();
3591 }
3592
3593 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3594                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3595                                    bool &NarrowLoad) {
3596   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3597
3598   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3599     return false;
3600
3601   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3602   LoadedVT = LoadN->getMemoryVT();
3603
3604   if (ExtVT == LoadedVT &&
3605       (!LegalOperations ||
3606        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3607     // ZEXTLOAD will match without needing to change the size of the value being
3608     // loaded.
3609     NarrowLoad = false;
3610     return true;
3611   }
3612
3613   // Do not change the width of a volatile load.
3614   if (LoadN->isVolatile())
3615     return false;
3616
3617   // Do not generate loads of non-round integer types since these can
3618   // be expensive (and would be wrong if the type is not byte sized).
3619   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3620     return false;
3621
3622   if (LegalOperations &&
3623       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3624     return false;
3625
3626   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3627     return false;
3628
3629   NarrowLoad = true;
3630   return true;
3631 }
3632
3633 SDValue DAGCombiner::visitAND(SDNode *N) {
3634   SDValue N0 = N->getOperand(0);
3635   SDValue N1 = N->getOperand(1);
3636   EVT VT = N1.getValueType();
3637
3638   // x & x --> x
3639   if (N0 == N1)
3640     return N0;
3641
3642   // fold vector ops
3643   if (VT.isVector()) {
3644     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3645       return FoldedVOp;
3646
3647     // fold (and x, 0) -> 0, vector edition
3648     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3649       // do not return N0, because undef node may exist in N0
3650       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3651                              SDLoc(N), N0.getValueType());
3652     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3653       // do not return N1, because undef node may exist in N1
3654       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3655                              SDLoc(N), N1.getValueType());
3656
3657     // fold (and x, -1) -> x, vector edition
3658     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3659       return N1;
3660     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3661       return N0;
3662   }
3663
3664   // fold (and c1, c2) -> c1&c2
3665   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3666   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3667   if (N0C && N1C && !N1C->isOpaque())
3668     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3669   // canonicalize constant to RHS
3670   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3671      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3672     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3673   // fold (and x, -1) -> x
3674   if (isAllOnesConstant(N1))
3675     return N0;
3676   // if (and x, c) is known to be zero, return 0
3677   unsigned BitWidth = VT.getScalarSizeInBits();
3678   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3679                                    APInt::getAllOnesValue(BitWidth)))
3680     return DAG.getConstant(0, SDLoc(N), VT);
3681
3682   if (SDValue NewSel = foldBinOpIntoSelect(N))
3683     return NewSel;
3684
3685   // reassociate and
3686   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3687     return RAND;
3688   // fold (and (or x, C), D) -> D if (C & D) == D
3689   if (N1C && N0.getOpcode() == ISD::OR)
3690     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3691       if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
3692         return N1;
3693   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3694   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3695     SDValue N0Op0 = N0.getOperand(0);
3696     APInt Mask = ~N1C->getAPIntValue();
3697     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3698     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3699       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3700                                  N0.getValueType(), N0Op0);
3701
3702       // Replace uses of the AND with uses of the Zero extend node.
3703       CombineTo(N, Zext);
3704
3705       // We actually want to replace all uses of the any_extend with the
3706       // zero_extend, to avoid duplicating things.  This will later cause this
3707       // AND to be folded.
3708       CombineTo(N0.getNode(), Zext);
3709       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3710     }
3711   }
3712   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3713   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3714   // already be zero by virtue of the width of the base type of the load.
3715   //
3716   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3717   // more cases.
3718   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3719        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3720        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3721        N0.getOperand(0).getResNo() == 0) ||
3722       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3723     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3724                                          N0 : N0.getOperand(0) );
3725
3726     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3727     // This can be a pure constant or a vector splat, in which case we treat the
3728     // vector as a scalar and use the splat value.
3729     APInt Constant = APInt::getNullValue(1);
3730     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3731       Constant = C->getAPIntValue();
3732     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3733       APInt SplatValue, SplatUndef;
3734       unsigned SplatBitSize;
3735       bool HasAnyUndefs;
3736       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3737                                              SplatBitSize, HasAnyUndefs);
3738       if (IsSplat) {
3739         // Undef bits can contribute to a possible optimisation if set, so
3740         // set them.
3741         SplatValue |= SplatUndef;
3742
3743         // The splat value may be something like "0x00FFFFFF", which means 0 for
3744         // the first vector value and FF for the rest, repeating. We need a mask
3745         // that will apply equally to all members of the vector, so AND all the
3746         // lanes of the constant together.
3747         EVT VT = Vector->getValueType(0);
3748         unsigned BitWidth = VT.getScalarSizeInBits();
3749
3750         // If the splat value has been compressed to a bitlength lower
3751         // than the size of the vector lane, we need to re-expand it to
3752         // the lane size.
3753         if (BitWidth > SplatBitSize)
3754           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3755                SplatBitSize < BitWidth;
3756                SplatBitSize = SplatBitSize * 2)
3757             SplatValue |= SplatValue.shl(SplatBitSize);
3758
3759         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3760         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3761         if (SplatBitSize % BitWidth == 0) {
3762           Constant = APInt::getAllOnesValue(BitWidth);
3763           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3764             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3765         }
3766       }
3767     }
3768
3769     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3770     // actually legal and isn't going to get expanded, else this is a false
3771     // optimisation.
3772     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3773                                                     Load->getValueType(0),
3774                                                     Load->getMemoryVT());
3775
3776     // Resize the constant to the same size as the original memory access before
3777     // extension. If it is still the AllOnesValue then this AND is completely
3778     // unneeded.
3779     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3780
3781     bool B;
3782     switch (Load->getExtensionType()) {
3783     default: B = false; break;
3784     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3785     case ISD::ZEXTLOAD:
3786     case ISD::NON_EXTLOAD: B = true; break;
3787     }
3788
3789     if (B && Constant.isAllOnesValue()) {
3790       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3791       // preserve semantics once we get rid of the AND.
3792       SDValue NewLoad(Load, 0);
3793
3794       // Fold the AND away. NewLoad may get replaced immediately.
3795       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3796
3797       if (Load->getExtensionType() == ISD::EXTLOAD) {
3798         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3799                               Load->getValueType(0), SDLoc(Load),
3800                               Load->getChain(), Load->getBasePtr(),
3801                               Load->getOffset(), Load->getMemoryVT(),
3802                               Load->getMemOperand());
3803         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3804         if (Load->getNumValues() == 3) {
3805           // PRE/POST_INC loads have 3 values.
3806           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3807                            NewLoad.getValue(2) };
3808           CombineTo(Load, To, 3, true);
3809         } else {
3810           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3811         }
3812       }
3813
3814       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3815     }
3816   }
3817
3818   // fold (and (load x), 255) -> (zextload x, i8)
3819   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3820   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3821   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3822                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3823                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3824     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3825     LoadSDNode *LN0 = HasAnyExt
3826       ? cast<LoadSDNode>(N0.getOperand(0))
3827       : cast<LoadSDNode>(N0);
3828     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3829         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3830       auto NarrowLoad = false;
3831       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3832       EVT ExtVT, LoadedVT;
3833       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3834                            NarrowLoad)) {
3835         if (!NarrowLoad) {
3836           SDValue NewLoad =
3837             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3838                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3839                            LN0->getMemOperand());
3840           AddToWorklist(N);
3841           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3842           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3843         } else {
3844           EVT PtrType = LN0->getOperand(1).getValueType();
3845
3846           unsigned Alignment = LN0->getAlignment();
3847           SDValue NewPtr = LN0->getBasePtr();
3848
3849           // For big endian targets, we need to add an offset to the pointer
3850           // to load the correct bytes.  For little endian systems, we merely
3851           // need to read fewer bytes from the same pointer.
3852           if (DAG.getDataLayout().isBigEndian()) {
3853             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3854             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3855             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3856             SDLoc DL(LN0);
3857             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3858                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3859             Alignment = MinAlign(Alignment, PtrOff);
3860           }
3861
3862           AddToWorklist(NewPtr.getNode());
3863
3864           SDValue Load = DAG.getExtLoad(
3865               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3866               LN0->getPointerInfo(), ExtVT, Alignment,
3867               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3868           AddToWorklist(N);
3869           CombineTo(LN0, Load, Load.getValue(1));
3870           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3871         }
3872       }
3873     }
3874   }
3875
3876   if (SDValue Combined = visitANDLike(N0, N1, N))
3877     return Combined;
3878
3879   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3880   if (N0.getOpcode() == N1.getOpcode())
3881     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3882       return Tmp;
3883
3884   // Masking the negated extension of a boolean is just the zero-extended
3885   // boolean:
3886   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3887   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3888   //
3889   // Note: the SimplifyDemandedBits fold below can make an information-losing
3890   // transform, and then we have no way to find this better fold.
3891   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3892     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3893     SDValue SubRHS = N0.getOperand(1);
3894     if (SubLHS && SubLHS->isNullValue()) {
3895       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3896           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3897         return SubRHS;
3898       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3899           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3900         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3901     }
3902   }
3903
3904   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3905   // fold (and (sra)) -> (and (srl)) when possible.
3906   if (SimplifyDemandedBits(SDValue(N, 0)))
3907     return SDValue(N, 0);
3908
3909   // fold (zext_inreg (extload x)) -> (zextload x)
3910   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3911     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3912     EVT MemVT = LN0->getMemoryVT();
3913     // If we zero all the possible extended bits, then we can turn this into
3914     // a zextload if we are running before legalize or the operation is legal.
3915     unsigned BitWidth = N1.getScalarValueSizeInBits();
3916     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3917                            BitWidth - MemVT.getScalarSizeInBits())) &&
3918         ((!LegalOperations && !LN0->isVolatile()) ||
3919          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3920       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3921                                        LN0->getChain(), LN0->getBasePtr(),
3922                                        MemVT, LN0->getMemOperand());
3923       AddToWorklist(N);
3924       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3925       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3926     }
3927   }
3928   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3929   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3930       N0.hasOneUse()) {
3931     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3932     EVT MemVT = LN0->getMemoryVT();
3933     // If we zero all the possible extended bits, then we can turn this into
3934     // a zextload if we are running before legalize or the operation is legal.
3935     unsigned BitWidth = N1.getScalarValueSizeInBits();
3936     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3937                            BitWidth - MemVT.getScalarSizeInBits())) &&
3938         ((!LegalOperations && !LN0->isVolatile()) ||
3939          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3940       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3941                                        LN0->getChain(), LN0->getBasePtr(),
3942                                        MemVT, LN0->getMemOperand());
3943       AddToWorklist(N);
3944       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3945       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3946     }
3947   }
3948   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3949   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3950     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3951                                            N0.getOperand(1), false))
3952       return BSwap;
3953   }
3954
3955   return SDValue();
3956 }
3957
3958 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3959 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3960                                         bool DemandHighBits) {
3961   if (!LegalOperations)
3962     return SDValue();
3963
3964   EVT VT = N->getValueType(0);
3965   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3966     return SDValue();
3967   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
3968     return SDValue();
3969
3970   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3971   bool LookPassAnd0 = false;
3972   bool LookPassAnd1 = false;
3973   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3974       std::swap(N0, N1);
3975   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3976       std::swap(N0, N1);
3977   if (N0.getOpcode() == ISD::AND) {
3978     if (!N0.getNode()->hasOneUse())
3979       return SDValue();
3980     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3981     if (!N01C || N01C->getZExtValue() != 0xFF00)
3982       return SDValue();
3983     N0 = N0.getOperand(0);
3984     LookPassAnd0 = true;
3985   }
3986
3987   if (N1.getOpcode() == ISD::AND) {
3988     if (!N1.getNode()->hasOneUse())
3989       return SDValue();
3990     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3991     if (!N11C || N11C->getZExtValue() != 0xFF)
3992       return SDValue();
3993     N1 = N1.getOperand(0);
3994     LookPassAnd1 = true;
3995   }
3996
3997   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3998     std::swap(N0, N1);
3999   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4000     return SDValue();
4001   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4002     return SDValue();
4003
4004   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4005   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4006   if (!N01C || !N11C)
4007     return SDValue();
4008   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4009     return SDValue();
4010
4011   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4012   SDValue N00 = N0->getOperand(0);
4013   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4014     if (!N00.getNode()->hasOneUse())
4015       return SDValue();
4016     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4017     if (!N001C || N001C->getZExtValue() != 0xFF)
4018       return SDValue();
4019     N00 = N00.getOperand(0);
4020     LookPassAnd0 = true;
4021   }
4022
4023   SDValue N10 = N1->getOperand(0);
4024   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4025     if (!N10.getNode()->hasOneUse())
4026       return SDValue();
4027     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4028     if (!N101C || N101C->getZExtValue() != 0xFF00)
4029       return SDValue();
4030     N10 = N10.getOperand(0);
4031     LookPassAnd1 = true;
4032   }
4033
4034   if (N00 != N10)
4035     return SDValue();
4036
4037   // Make sure everything beyond the low halfword gets set to zero since the SRL
4038   // 16 will clear the top bits.
4039   unsigned OpSizeInBits = VT.getSizeInBits();
4040   if (DemandHighBits && OpSizeInBits > 16) {
4041     // If the left-shift isn't masked out then the only way this is a bswap is
4042     // if all bits beyond the low 8 are 0. In that case the entire pattern
4043     // reduces to a left shift anyway: leave it for other parts of the combiner.
4044     if (!LookPassAnd0)
4045       return SDValue();
4046
4047     // However, if the right shift isn't masked out then it might be because
4048     // it's not needed. See if we can spot that too.
4049     if (!LookPassAnd1 &&
4050         !DAG.MaskedValueIsZero(
4051             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4052       return SDValue();
4053   }
4054
4055   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4056   if (OpSizeInBits > 16) {
4057     SDLoc DL(N);
4058     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4059                       DAG.getConstant(OpSizeInBits - 16, DL,
4060                                       getShiftAmountTy(VT)));
4061   }
4062   return Res;
4063 }
4064
4065 /// Return true if the specified node is an element that makes up a 32-bit
4066 /// packed halfword byteswap.
4067 /// ((x & 0x000000ff) << 8) |
4068 /// ((x & 0x0000ff00) >> 8) |
4069 /// ((x & 0x00ff0000) << 8) |
4070 /// ((x & 0xff000000) >> 8)
4071 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4072   if (!N.getNode()->hasOneUse())
4073     return false;
4074
4075   unsigned Opc = N.getOpcode();
4076   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4077     return false;
4078
4079   SDValue N0 = N.getOperand(0);
4080   unsigned Opc0 = N0.getOpcode();
4081   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4082     return false;
4083
4084   ConstantSDNode *N1C = nullptr;
4085   // SHL or SRL: look upstream for AND mask operand
4086   if (Opc == ISD::AND)
4087     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4088   else if (Opc0 == ISD::AND)
4089     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4090   if (!N1C)
4091     return false;
4092
4093   unsigned MaskByteOffset;
4094   switch (N1C->getZExtValue()) {
4095   default:
4096     return false;
4097   case 0xFF:       MaskByteOffset = 0; break;
4098   case 0xFF00:     MaskByteOffset = 1; break;
4099   case 0xFF0000:   MaskByteOffset = 2; break;
4100   case 0xFF000000: MaskByteOffset = 3; break;
4101   }
4102
4103   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4104   if (Opc == ISD::AND) {
4105     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4106       // (x >> 8) & 0xff
4107       // (x >> 8) & 0xff0000
4108       if (Opc0 != ISD::SRL)
4109         return false;
4110       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4111       if (!C || C->getZExtValue() != 8)
4112         return false;
4113     } else {
4114       // (x << 8) & 0xff00
4115       // (x << 8) & 0xff000000
4116       if (Opc0 != ISD::SHL)
4117         return false;
4118       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4119       if (!C || C->getZExtValue() != 8)
4120         return false;
4121     }
4122   } else if (Opc == ISD::SHL) {
4123     // (x & 0xff) << 8
4124     // (x & 0xff0000) << 8
4125     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4126       return false;
4127     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4128     if (!C || C->getZExtValue() != 8)
4129       return false;
4130   } else { // Opc == ISD::SRL
4131     // (x & 0xff00) >> 8
4132     // (x & 0xff000000) >> 8
4133     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4134       return false;
4135     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4136     if (!C || C->getZExtValue() != 8)
4137       return false;
4138   }
4139
4140   if (Parts[MaskByteOffset])
4141     return false;
4142
4143   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4144   return true;
4145 }
4146
4147 /// Match a 32-bit packed halfword bswap. That is
4148 /// ((x & 0x000000ff) << 8) |
4149 /// ((x & 0x0000ff00) >> 8) |
4150 /// ((x & 0x00ff0000) << 8) |
4151 /// ((x & 0xff000000) >> 8)
4152 /// => (rotl (bswap x), 16)
4153 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4154   if (!LegalOperations)
4155     return SDValue();
4156
4157   EVT VT = N->getValueType(0);
4158   if (VT != MVT::i32)
4159     return SDValue();
4160   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4161     return SDValue();
4162
4163   // Look for either
4164   // (or (or (and), (and)), (or (and), (and)))
4165   // (or (or (or (and), (and)), (and)), (and))
4166   if (N0.getOpcode() != ISD::OR)
4167     return SDValue();
4168   SDValue N00 = N0.getOperand(0);
4169   SDValue N01 = N0.getOperand(1);
4170   SDNode *Parts[4] = {};
4171
4172   if (N1.getOpcode() == ISD::OR &&
4173       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4174     // (or (or (and), (and)), (or (and), (and)))
4175     if (!isBSwapHWordElement(N00, Parts))
4176       return SDValue();
4177
4178     if (!isBSwapHWordElement(N01, Parts))
4179       return SDValue();
4180     SDValue N10 = N1.getOperand(0);
4181     if (!isBSwapHWordElement(N10, Parts))
4182       return SDValue();
4183     SDValue N11 = N1.getOperand(1);
4184     if (!isBSwapHWordElement(N11, Parts))
4185       return SDValue();
4186   } else {
4187     // (or (or (or (and), (and)), (and)), (and))
4188     if (!isBSwapHWordElement(N1, Parts))
4189       return SDValue();
4190     if (!isBSwapHWordElement(N01, Parts))
4191       return SDValue();
4192     if (N00.getOpcode() != ISD::OR)
4193       return SDValue();
4194     SDValue N000 = N00.getOperand(0);
4195     if (!isBSwapHWordElement(N000, Parts))
4196       return SDValue();
4197     SDValue N001 = N00.getOperand(1);
4198     if (!isBSwapHWordElement(N001, Parts))
4199       return SDValue();
4200   }
4201
4202   // Make sure the parts are all coming from the same node.
4203   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4204     return SDValue();
4205
4206   SDLoc DL(N);
4207   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4208                               SDValue(Parts[0], 0));
4209
4210   // Result of the bswap should be rotated by 16. If it's not legal, then
4211   // do  (x << 16) | (x >> 16).
4212   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4213   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4214     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4215   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4216     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4217   return DAG.getNode(ISD::OR, DL, VT,
4218                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4219                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4220 }
4221
4222 /// This contains all DAGCombine rules which reduce two values combined by
4223 /// an Or operation to a single value \see visitANDLike().
4224 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4225   EVT VT = N1.getValueType();
4226   SDLoc DL(N);
4227
4228   // fold (or x, undef) -> -1
4229   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4230     return DAG.getAllOnesConstant(DL, VT);
4231
4232   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4233     return V;
4234
4235   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4236   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4237       // Don't increase # computations.
4238       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4239     // We can only do this xform if we know that bits from X that are set in C2
4240     // but not in C1 are already zero.  Likewise for Y.
4241     if (const ConstantSDNode *N0O1C =
4242         getAsNonOpaqueConstant(N0.getOperand(1))) {
4243       if (const ConstantSDNode *N1O1C =
4244           getAsNonOpaqueConstant(N1.getOperand(1))) {
4245         // We can only do this xform if we know that bits from X that are set in
4246         // C2 but not in C1 are already zero.  Likewise for Y.
4247         const APInt &LHSMask = N0O1C->getAPIntValue();
4248         const APInt &RHSMask = N1O1C->getAPIntValue();
4249
4250         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4251             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4252           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4253                                   N0.getOperand(0), N1.getOperand(0));
4254           return DAG.getNode(ISD::AND, DL, VT, X,
4255                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4256         }
4257       }
4258     }
4259   }
4260
4261   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4262   if (N0.getOpcode() == ISD::AND &&
4263       N1.getOpcode() == ISD::AND &&
4264       N0.getOperand(0) == N1.getOperand(0) &&
4265       // Don't increase # computations.
4266       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4267     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4268                             N0.getOperand(1), N1.getOperand(1));
4269     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4270   }
4271
4272   return SDValue();
4273 }
4274
4275 SDValue DAGCombiner::visitOR(SDNode *N) {
4276   SDValue N0 = N->getOperand(0);
4277   SDValue N1 = N->getOperand(1);
4278   EVT VT = N1.getValueType();
4279
4280   // x | x --> x
4281   if (N0 == N1)
4282     return N0;
4283
4284   // fold vector ops
4285   if (VT.isVector()) {
4286     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4287       return FoldedVOp;
4288
4289     // fold (or x, 0) -> x, vector edition
4290     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4291       return N1;
4292     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4293       return N0;
4294
4295     // fold (or x, -1) -> -1, vector edition
4296     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4297       // do not return N0, because undef node may exist in N0
4298       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4299     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4300       // do not return N1, because undef node may exist in N1
4301       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4302
4303     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4304     // Do this only if the resulting shuffle is legal.
4305     if (isa<ShuffleVectorSDNode>(N0) &&
4306         isa<ShuffleVectorSDNode>(N1) &&
4307         // Avoid folding a node with illegal type.
4308         TLI.isTypeLegal(VT)) {
4309       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4310       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4311       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4312       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4313       // Ensure both shuffles have a zero input.
4314       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4315         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4316         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4317         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4318         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4319         bool CanFold = true;
4320         int NumElts = VT.getVectorNumElements();
4321         SmallVector<int, 4> Mask(NumElts);
4322
4323         for (int i = 0; i != NumElts; ++i) {
4324           int M0 = SV0->getMaskElt(i);
4325           int M1 = SV1->getMaskElt(i);
4326
4327           // Determine if either index is pointing to a zero vector.
4328           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4329           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4330
4331           // If one element is zero and the otherside is undef, keep undef.
4332           // This also handles the case that both are undef.
4333           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4334             Mask[i] = -1;
4335             continue;
4336           }
4337
4338           // Make sure only one of the elements is zero.
4339           if (M0Zero == M1Zero) {
4340             CanFold = false;
4341             break;
4342           }
4343
4344           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4345
4346           // We have a zero and non-zero element. If the non-zero came from
4347           // SV0 make the index a LHS index. If it came from SV1, make it
4348           // a RHS index. We need to mod by NumElts because we don't care
4349           // which operand it came from in the original shuffles.
4350           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4351         }
4352
4353         if (CanFold) {
4354           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4355           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4356
4357           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4358           if (!LegalMask) {
4359             std::swap(NewLHS, NewRHS);
4360             ShuffleVectorSDNode::commuteMask(Mask);
4361             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4362           }
4363
4364           if (LegalMask)
4365             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4366         }
4367       }
4368     }
4369   }
4370
4371   // fold (or c1, c2) -> c1|c2
4372   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4373   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4374   if (N0C && N1C && !N1C->isOpaque())
4375     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4376   // canonicalize constant to RHS
4377   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4378      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4379     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4380   // fold (or x, 0) -> x
4381   if (isNullConstant(N1))
4382     return N0;
4383   // fold (or x, -1) -> -1
4384   if (isAllOnesConstant(N1))
4385     return N1;
4386
4387   if (SDValue NewSel = foldBinOpIntoSelect(N))
4388     return NewSel;
4389
4390   // fold (or x, c) -> c iff (x & ~c) == 0
4391   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4392     return N1;
4393
4394   if (SDValue Combined = visitORLike(N0, N1, N))
4395     return Combined;
4396
4397   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4398   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4399     return BSwap;
4400   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4401     return BSwap;
4402
4403   // reassociate or
4404   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4405     return ROR;
4406
4407   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4408   // iff (c1 & c2) != 0.
4409   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4410     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4411       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4412         if (SDValue COR =
4413                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4414           return DAG.getNode(
4415               ISD::AND, SDLoc(N), VT,
4416               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4417         return SDValue();
4418       }
4419     }
4420   }
4421
4422   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4423   if (N0.getOpcode() == N1.getOpcode())
4424     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4425       return Tmp;
4426
4427   // See if this is some rotate idiom.
4428   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4429     return SDValue(Rot, 0);
4430
4431   if (SDValue Load = MatchLoadCombine(N))
4432     return Load;
4433
4434   // Simplify the operands using demanded-bits information.
4435   if (SimplifyDemandedBits(SDValue(N, 0)))
4436     return SDValue(N, 0);
4437
4438   return SDValue();
4439 }
4440
4441 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4442 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4443   if (Op.getOpcode() == ISD::AND) {
4444     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4445       Mask = Op.getOperand(1);
4446       Op = Op.getOperand(0);
4447     } else {
4448       return false;
4449     }
4450   }
4451
4452   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4453     Shift = Op;
4454     return true;
4455   }
4456
4457   return false;
4458 }
4459
4460 // Return true if we can prove that, whenever Neg and Pos are both in the
4461 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4462 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4463 //
4464 //     (or (shift1 X, Neg), (shift2 X, Pos))
4465 //
4466 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4467 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4468 // to consider shift amounts with defined behavior.
4469 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4470   // If EltSize is a power of 2 then:
4471   //
4472   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4473   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4474   //
4475   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4476   // for the stronger condition:
4477   //
4478   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4479   //
4480   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4481   // we can just replace Neg with Neg' for the rest of the function.
4482   //
4483   // In other cases we check for the even stronger condition:
4484   //
4485   //     Neg == EltSize - Pos                                    [B]
4486   //
4487   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4488   // behavior if Pos == 0 (and consequently Neg == EltSize).
4489   //
4490   // We could actually use [A] whenever EltSize is a power of 2, but the
4491   // only extra cases that it would match are those uninteresting ones
4492   // where Neg and Pos are never in range at the same time.  E.g. for
4493   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4494   // as well as (sub 32, Pos), but:
4495   //
4496   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4497   //
4498   // always invokes undefined behavior for 32-bit X.
4499   //
4500   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4501   unsigned MaskLoBits = 0;
4502   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4503     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4504       if (NegC->getAPIntValue() == EltSize - 1) {
4505         Neg = Neg.getOperand(0);
4506         MaskLoBits = Log2_64(EltSize);
4507       }
4508     }
4509   }
4510
4511   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4512   if (Neg.getOpcode() != ISD::SUB)
4513     return false;
4514   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4515   if (!NegC)
4516     return false;
4517   SDValue NegOp1 = Neg.getOperand(1);
4518
4519   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4520   // Pos'.  The truncation is redundant for the purpose of the equality.
4521   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4522     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4523       if (PosC->getAPIntValue() == EltSize - 1)
4524         Pos = Pos.getOperand(0);
4525
4526   // The condition we need is now:
4527   //
4528   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4529   //
4530   // If NegOp1 == Pos then we need:
4531   //
4532   //              EltSize & Mask == NegC & Mask
4533   //
4534   // (because "x & Mask" is a truncation and distributes through subtraction).
4535   APInt Width;
4536   if (Pos == NegOp1)
4537     Width = NegC->getAPIntValue();
4538
4539   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4540   // Then the condition we want to prove becomes:
4541   //
4542   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4543   //
4544   // which, again because "x & Mask" is a truncation, becomes:
4545   //
4546   //                NegC & Mask == (EltSize - PosC) & Mask
4547   //             EltSize & Mask == (NegC + PosC) & Mask
4548   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4549     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4550       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4551     else
4552       return false;
4553   } else
4554     return false;
4555
4556   // Now we just need to check that EltSize & Mask == Width & Mask.
4557   if (MaskLoBits)
4558     // EltSize & Mask is 0 since Mask is EltSize - 1.
4559     return Width.getLoBits(MaskLoBits) == 0;
4560   return Width == EltSize;
4561 }
4562
4563 // A subroutine of MatchRotate used once we have found an OR of two opposite
4564 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4565 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4566 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4567 // Neg with outer conversions stripped away.
4568 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4569                                        SDValue Neg, SDValue InnerPos,
4570                                        SDValue InnerNeg, unsigned PosOpcode,
4571                                        unsigned NegOpcode, const SDLoc &DL) {
4572   // fold (or (shl x, (*ext y)),
4573   //          (srl x, (*ext (sub 32, y)))) ->
4574   //   (rotl x, y) or (rotr x, (sub 32, y))
4575   //
4576   // fold (or (shl x, (*ext (sub 32, y))),
4577   //          (srl x, (*ext y))) ->
4578   //   (rotr x, y) or (rotl x, (sub 32, y))
4579   EVT VT = Shifted.getValueType();
4580   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4581     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4582     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4583                        HasPos ? Pos : Neg).getNode();
4584   }
4585
4586   return nullptr;
4587 }
4588
4589 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4590 // idioms for rotate, and if the target supports rotation instructions, generate
4591 // a rot[lr].
4592 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4593   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4594   EVT VT = LHS.getValueType();
4595   if (!TLI.isTypeLegal(VT)) return nullptr;
4596
4597   // The target must have at least one rotate flavor.
4598   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4599   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4600   if (!HasROTL && !HasROTR) return nullptr;
4601
4602   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4603   SDValue LHSShift;   // The shift.
4604   SDValue LHSMask;    // AND value if any.
4605   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4606     return nullptr; // Not part of a rotate.
4607
4608   SDValue RHSShift;   // The shift.
4609   SDValue RHSMask;    // AND value if any.
4610   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4611     return nullptr; // Not part of a rotate.
4612
4613   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4614     return nullptr;   // Not shifting the same value.
4615
4616   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4617     return nullptr;   // Shifts must disagree.
4618
4619   // Canonicalize shl to left side in a shl/srl pair.
4620   if (RHSShift.getOpcode() == ISD::SHL) {
4621     std::swap(LHS, RHS);
4622     std::swap(LHSShift, RHSShift);
4623     std::swap(LHSMask, RHSMask);
4624   }
4625
4626   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4627   SDValue LHSShiftArg = LHSShift.getOperand(0);
4628   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4629   SDValue RHSShiftArg = RHSShift.getOperand(0);
4630   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4631
4632   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4633   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4634   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4635     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4636     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4637     if ((LShVal + RShVal) != EltSizeInBits)
4638       return nullptr;
4639
4640     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4641                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4642
4643     // If there is an AND of either shifted operand, apply it to the result.
4644     if (LHSMask.getNode() || RHSMask.getNode()) {
4645       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
4646
4647       if (LHSMask.getNode()) {
4648         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4649         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4650                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4651                                        DAG.getConstant(RHSBits, DL, VT)));
4652       }
4653       if (RHSMask.getNode()) {
4654         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4655         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4656                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4657                                        DAG.getConstant(LHSBits, DL, VT)));
4658       }
4659
4660       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4661     }
4662
4663     return Rot.getNode();
4664   }
4665
4666   // If there is a mask here, and we have a variable shift, we can't be sure
4667   // that we're masking out the right stuff.
4668   if (LHSMask.getNode() || RHSMask.getNode())
4669     return nullptr;
4670
4671   // If the shift amount is sign/zext/any-extended just peel it off.
4672   SDValue LExtOp0 = LHSShiftAmt;
4673   SDValue RExtOp0 = RHSShiftAmt;
4674   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4675        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4676        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4677        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4678       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4679        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4680        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4681        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4682     LExtOp0 = LHSShiftAmt.getOperand(0);
4683     RExtOp0 = RHSShiftAmt.getOperand(0);
4684   }
4685
4686   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4687                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4688   if (TryL)
4689     return TryL;
4690
4691   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4692                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4693   if (TryR)
4694     return TryR;
4695
4696   return nullptr;
4697 }
4698
4699 namespace {
4700 /// Represents known origin of an individual byte in load combine pattern. The
4701 /// value of the byte is either constant zero or comes from memory.
4702 struct ByteProvider {
4703   // For constant zero providers Load is set to nullptr. For memory providers
4704   // Load represents the node which loads the byte from memory.
4705   // ByteOffset is the offset of the byte in the value produced by the load.
4706   LoadSDNode *Load;
4707   unsigned ByteOffset;
4708
4709   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4710
4711   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4712     return ByteProvider(Load, ByteOffset);
4713   }
4714   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4715
4716   bool isConstantZero() const { return !Load; }
4717   bool isMemory() const { return Load; }
4718
4719   bool operator==(const ByteProvider &Other) const {
4720     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4721   }
4722
4723 private:
4724   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4725       : Load(Load), ByteOffset(ByteOffset) {}
4726 };
4727
4728 /// Recursively traverses the expression calculating the origin of the requested
4729 /// byte of the given value. Returns None if the provider can't be calculated.
4730 ///
4731 /// For all the values except the root of the expression verifies that the value
4732 /// has exactly one use and if it's not true return None. This way if the origin
4733 /// of the byte is returned it's guaranteed that the values which contribute to
4734 /// the byte are not used outside of this expression.
4735 ///
4736 /// Because the parts of the expression are not allowed to have more than one
4737 /// use this function iterates over trees, not DAGs. So it never visits the same
4738 /// node more than once.
4739 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4740                                                    unsigned Depth,
4741                                                    bool Root = false) {
4742   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4743   if (Depth == 10)
4744     return None;
4745
4746   if (!Root && !Op.hasOneUse())
4747     return None;
4748
4749   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4750   unsigned BitWidth = Op.getValueSizeInBits();
4751   if (BitWidth % 8 != 0)
4752     return None;
4753   unsigned ByteWidth = BitWidth / 8;
4754   assert(Index < ByteWidth && "invalid index requested");
4755   (void) ByteWidth;
4756
4757   switch (Op.getOpcode()) {
4758   case ISD::OR: {
4759     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4760     if (!LHS)
4761       return None;
4762     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4763     if (!RHS)
4764       return None;
4765
4766     if (LHS->isConstantZero())
4767       return RHS;
4768     if (RHS->isConstantZero())
4769       return LHS;
4770     return None;
4771   }
4772   case ISD::SHL: {
4773     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4774     if (!ShiftOp)
4775       return None;
4776
4777     uint64_t BitShift = ShiftOp->getZExtValue();
4778     if (BitShift % 8 != 0)
4779       return None;
4780     uint64_t ByteShift = BitShift / 8;
4781
4782     return Index < ByteShift
4783                ? ByteProvider::getConstantZero()
4784                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4785                                        Depth + 1);
4786   }
4787   case ISD::ANY_EXTEND:
4788   case ISD::SIGN_EXTEND:
4789   case ISD::ZERO_EXTEND: {
4790     SDValue NarrowOp = Op->getOperand(0);
4791     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4792     if (NarrowBitWidth % 8 != 0)
4793       return None;
4794     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4795
4796     if (Index >= NarrowByteWidth)
4797       return Op.getOpcode() == ISD::ZERO_EXTEND
4798                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4799                  : None;
4800     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4801   }
4802   case ISD::BSWAP:
4803     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4804                                  Depth + 1);
4805   case ISD::LOAD: {
4806     auto L = cast<LoadSDNode>(Op.getNode());
4807     if (L->isVolatile() || L->isIndexed())
4808       return None;
4809
4810     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4811     if (NarrowBitWidth % 8 != 0)
4812       return None;
4813     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4814
4815     if (Index >= NarrowByteWidth)
4816       return L->getExtensionType() == ISD::ZEXTLOAD
4817                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4818                  : None;
4819     return ByteProvider::getMemory(L, Index);
4820   }
4821   }
4822
4823   return None;
4824 }
4825 } // namespace
4826
4827 /// Match a pattern where a wide type scalar value is loaded by several narrow
4828 /// loads and combined by shifts and ors. Fold it into a single load or a load
4829 /// and a BSWAP if the targets supports it.
4830 ///
4831 /// Assuming little endian target:
4832 ///  i8 *a = ...
4833 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4834 /// =>
4835 ///  i32 val = *((i32)a)
4836 ///
4837 ///  i8 *a = ...
4838 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4839 /// =>
4840 ///  i32 val = BSWAP(*((i32)a))
4841 ///
4842 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4843 /// interact well with the worklist mechanism. When a part of the pattern is
4844 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4845 /// but the root node of the pattern which triggers the load combine is not
4846 /// necessarily a direct user of the changed node. For example, once the address
4847 /// of t28 load is reassociated load combine won't be triggered:
4848 ///             t25: i32 = add t4, Constant:i32<2>
4849 ///           t26: i64 = sign_extend t25
4850 ///        t27: i64 = add t2, t26
4851 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4852 ///     t29: i32 = zero_extend t28
4853 ///   t32: i32 = shl t29, Constant:i8<8>
4854 /// t33: i32 = or t23, t32
4855 /// As a possible fix visitLoad can check if the load can be a part of a load
4856 /// combine pattern and add corresponding OR roots to the worklist.
4857 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4858   assert(N->getOpcode() == ISD::OR &&
4859          "Can only match load combining against OR nodes");
4860
4861   // Handles simple types only
4862   EVT VT = N->getValueType(0);
4863   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4864     return SDValue();
4865   unsigned ByteWidth = VT.getSizeInBits() / 8;
4866
4867   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4868   // Before legalize we can introduce too wide illegal loads which will be later
4869   // split into legal sized loads. This enables us to combine i64 load by i8
4870   // patterns to a couple of i32 loads on 32 bit targets.
4871   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4872     return SDValue();
4873
4874   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4875     unsigned BW, unsigned i) { return i; };
4876   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4877     unsigned BW, unsigned i) { return BW - i - 1; };
4878
4879   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4880   auto MemoryByteOffset = [&] (ByteProvider P) {
4881     assert(P.isMemory() && "Must be a memory byte provider");
4882     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4883     assert(LoadBitWidth % 8 == 0 &&
4884            "can only analyze providers for individual bytes not bit");
4885     unsigned LoadByteWidth = LoadBitWidth / 8;
4886     return IsBigEndianTarget
4887             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4888             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4889   };
4890
4891   Optional<BaseIndexOffset> Base;
4892   SDValue Chain;
4893
4894   SmallSet<LoadSDNode *, 8> Loads;
4895   Optional<ByteProvider> FirstByteProvider;
4896   int64_t FirstOffset = INT64_MAX;
4897
4898   // Check if all the bytes of the OR we are looking at are loaded from the same
4899   // base address. Collect bytes offsets from Base address in ByteOffsets.
4900   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4901   for (unsigned i = 0; i < ByteWidth; i++) {
4902     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4903     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4904       return SDValue();
4905
4906     LoadSDNode *L = P->Load;
4907     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4908            "Must be enforced by calculateByteProvider");
4909     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4910
4911     // All loads must share the same chain
4912     SDValue LChain = L->getChain();
4913     if (!Chain)
4914       Chain = LChain;
4915     else if (Chain != LChain)
4916       return SDValue();
4917
4918     // Loads must share the same base address
4919     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
4920     int64_t ByteOffsetFromBase = 0;
4921     if (!Base)
4922       Base = Ptr;
4923     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
4924       return SDValue();
4925
4926     // Calculate the offset of the current byte from the base address
4927     ByteOffsetFromBase += MemoryByteOffset(*P);
4928     ByteOffsets[i] = ByteOffsetFromBase;
4929
4930     // Remember the first byte load
4931     if (ByteOffsetFromBase < FirstOffset) {
4932       FirstByteProvider = P;
4933       FirstOffset = ByteOffsetFromBase;
4934     }
4935
4936     Loads.insert(L);
4937   }
4938   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
4939          "memory, so there must be at least one load which produces the value");
4940   assert(Base && "Base address of the accessed memory location must be set");
4941   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
4942
4943   // Check if the bytes of the OR we are looking at match with either big or
4944   // little endian value load
4945   bool BigEndian = true, LittleEndian = true;
4946   for (unsigned i = 0; i < ByteWidth; i++) {
4947     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
4948     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
4949     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
4950     if (!BigEndian && !LittleEndian)
4951       return SDValue();
4952   }
4953   assert((BigEndian != LittleEndian) && "should be either or");
4954   assert(FirstByteProvider && "must be set");
4955
4956   // Ensure that the first byte is loaded from zero offset of the first load.
4957   // So the combined value can be loaded from the first load address.
4958   if (MemoryByteOffset(*FirstByteProvider) != 0)
4959     return SDValue();
4960   LoadSDNode *FirstLoad = FirstByteProvider->Load;
4961
4962   // The node we are looking at matches with the pattern, check if we can
4963   // replace it with a single load and bswap if needed.
4964
4965   // If the load needs byte swap check if the target supports it
4966   bool NeedsBswap = IsBigEndianTarget != BigEndian;
4967
4968   // Before legalize we can introduce illegal bswaps which will be later
4969   // converted to an explicit bswap sequence. This way we end up with a single
4970   // load and byte shuffling instead of several loads and byte shuffling.
4971   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
4972     return SDValue();
4973
4974   // Check that a load of the wide type is both allowed and fast on the target
4975   bool Fast = false;
4976   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
4977                                         VT, FirstLoad->getAddressSpace(),
4978                                         FirstLoad->getAlignment(), &Fast);
4979   if (!Allowed || !Fast)
4980     return SDValue();
4981
4982   SDValue NewLoad =
4983       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
4984                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
4985
4986   // Transfer chain users from old loads to the new load.
4987   for (LoadSDNode *L : Loads)
4988     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
4989
4990   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
4991 }
4992
4993 SDValue DAGCombiner::visitXOR(SDNode *N) {
4994   SDValue N0 = N->getOperand(0);
4995   SDValue N1 = N->getOperand(1);
4996   EVT VT = N0.getValueType();
4997
4998   // fold vector ops
4999   if (VT.isVector()) {
5000     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5001       return FoldedVOp;
5002
5003     // fold (xor x, 0) -> x, vector edition
5004     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5005       return N1;
5006     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5007       return N0;
5008   }
5009
5010   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5011   if (N0.isUndef() && N1.isUndef())
5012     return DAG.getConstant(0, SDLoc(N), VT);
5013   // fold (xor x, undef) -> undef
5014   if (N0.isUndef())
5015     return N0;
5016   if (N1.isUndef())
5017     return N1;
5018   // fold (xor c1, c2) -> c1^c2
5019   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5020   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5021   if (N0C && N1C)
5022     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5023   // canonicalize constant to RHS
5024   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5025      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5026     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5027   // fold (xor x, 0) -> x
5028   if (isNullConstant(N1))
5029     return N0;
5030
5031   if (SDValue NewSel = foldBinOpIntoSelect(N))
5032     return NewSel;
5033
5034   // reassociate xor
5035   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5036     return RXOR;
5037
5038   // fold !(x cc y) -> (x !cc y)
5039   SDValue LHS, RHS, CC;
5040   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5041     bool isInt = LHS.getValueType().isInteger();
5042     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5043                                                isInt);
5044
5045     if (!LegalOperations ||
5046         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5047       switch (N0.getOpcode()) {
5048       default:
5049         llvm_unreachable("Unhandled SetCC Equivalent!");
5050       case ISD::SETCC:
5051         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5052       case ISD::SELECT_CC:
5053         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5054                                N0.getOperand(3), NotCC);
5055       }
5056     }
5057   }
5058
5059   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5060   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5061       N0.getNode()->hasOneUse() &&
5062       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5063     SDValue V = N0.getOperand(0);
5064     SDLoc DL(N0);
5065     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5066                     DAG.getConstant(1, DL, V.getValueType()));
5067     AddToWorklist(V.getNode());
5068     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5069   }
5070
5071   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5072   if (isOneConstant(N1) && VT == MVT::i1 &&
5073       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5074     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5075     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5076       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5077       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5078       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5079       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5080       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5081     }
5082   }
5083   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5084   if (isAllOnesConstant(N1) &&
5085       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5086     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5087     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5088       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5089       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5090       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5091       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5092       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5093     }
5094   }
5095   // fold (xor (and x, y), y) -> (and (not x), y)
5096   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5097       N0->getOperand(1) == N1) {
5098     SDValue X = N0->getOperand(0);
5099     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5100     AddToWorklist(NotX.getNode());
5101     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5102   }
5103   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5104   if (N1C && N0.getOpcode() == ISD::XOR) {
5105     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
5106       SDLoc DL(N);
5107       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5108                          DAG.getConstant(N1C->getAPIntValue() ^
5109                                          N00C->getAPIntValue(), DL, VT));
5110     }
5111     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5112       SDLoc DL(N);
5113       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5114                          DAG.getConstant(N1C->getAPIntValue() ^
5115                                          N01C->getAPIntValue(), DL, VT));
5116     }
5117   }
5118
5119   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5120   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5121   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5122       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5123       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5124     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5125       if (C->getAPIntValue() == (OpSizeInBits - 1))
5126         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5127   }
5128
5129   // fold (xor x, x) -> 0
5130   if (N0 == N1)
5131     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5132
5133   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5134   // Here is a concrete example of this equivalence:
5135   // i16   x ==  14
5136   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5137   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5138   //
5139   // =>
5140   //
5141   // i16     ~1      == 0b1111111111111110
5142   // i16 rol(~1, 14) == 0b1011111111111111
5143   //
5144   // Some additional tips to help conceptualize this transform:
5145   // - Try to see the operation as placing a single zero in a value of all ones.
5146   // - There exists no value for x which would allow the result to contain zero.
5147   // - Values of x larger than the bitwidth are undefined and do not require a
5148   //   consistent result.
5149   // - Pushing the zero left requires shifting one bits in from the right.
5150   // A rotate left of ~1 is a nice way of achieving the desired result.
5151   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5152       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5153     SDLoc DL(N);
5154     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5155                        N0.getOperand(1));
5156   }
5157
5158   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5159   if (N0.getOpcode() == N1.getOpcode())
5160     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5161       return Tmp;
5162
5163   // Simplify the expression using non-local knowledge.
5164   if (SimplifyDemandedBits(SDValue(N, 0)))
5165     return SDValue(N, 0);
5166
5167   return SDValue();
5168 }
5169
5170 /// Handle transforms common to the three shifts, when the shift amount is a
5171 /// constant.
5172 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5173   SDNode *LHS = N->getOperand(0).getNode();
5174   if (!LHS->hasOneUse()) return SDValue();
5175
5176   // We want to pull some binops through shifts, so that we have (and (shift))
5177   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5178   // thing happens with address calculations, so it's important to canonicalize
5179   // it.
5180   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5181
5182   switch (LHS->getOpcode()) {
5183   default: return SDValue();
5184   case ISD::OR:
5185   case ISD::XOR:
5186     HighBitSet = false; // We can only transform sra if the high bit is clear.
5187     break;
5188   case ISD::AND:
5189     HighBitSet = true;  // We can only transform sra if the high bit is set.
5190     break;
5191   case ISD::ADD:
5192     if (N->getOpcode() != ISD::SHL)
5193       return SDValue(); // only shl(add) not sr[al](add).
5194     HighBitSet = false; // We can only transform sra if the high bit is clear.
5195     break;
5196   }
5197
5198   // We require the RHS of the binop to be a constant and not opaque as well.
5199   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5200   if (!BinOpCst) return SDValue();
5201
5202   // FIXME: disable this unless the input to the binop is a shift by a constant
5203   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5204   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5205   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5206                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5207                  BinOpLHSVal->getOpcode() == ISD::SRL;
5208   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5209                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5210
5211   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5212       !isCopyOrSelect)
5213     return SDValue();
5214
5215   if (isCopyOrSelect && N->hasOneUse())
5216     return SDValue();
5217
5218   EVT VT = N->getValueType(0);
5219
5220   // If this is a signed shift right, and the high bit is modified by the
5221   // logical operation, do not perform the transformation. The highBitSet
5222   // boolean indicates the value of the high bit of the constant which would
5223   // cause it to be modified for this operation.
5224   if (N->getOpcode() == ISD::SRA) {
5225     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5226     if (BinOpRHSSignSet != HighBitSet)
5227       return SDValue();
5228   }
5229
5230   if (!TLI.isDesirableToCommuteWithShift(LHS))
5231     return SDValue();
5232
5233   // Fold the constants, shifting the binop RHS by the shift amount.
5234   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5235                                N->getValueType(0),
5236                                LHS->getOperand(1), N->getOperand(1));
5237   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5238
5239   // Create the new shift.
5240   SDValue NewShift = DAG.getNode(N->getOpcode(),
5241                                  SDLoc(LHS->getOperand(0)),
5242                                  VT, LHS->getOperand(0), N->getOperand(1));
5243
5244   // Create the new binop.
5245   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5246 }
5247
5248 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5249   assert(N->getOpcode() == ISD::TRUNCATE);
5250   assert(N->getOperand(0).getOpcode() == ISD::AND);
5251
5252   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5253   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5254     SDValue N01 = N->getOperand(0).getOperand(1);
5255     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5256       SDLoc DL(N);
5257       EVT TruncVT = N->getValueType(0);
5258       SDValue N00 = N->getOperand(0).getOperand(0);
5259       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5260       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5261       AddToWorklist(Trunc00.getNode());
5262       AddToWorklist(Trunc01.getNode());
5263       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5264     }
5265   }
5266
5267   return SDValue();
5268 }
5269
5270 SDValue DAGCombiner::visitRotate(SDNode *N) {
5271   SDLoc dl(N);
5272   SDValue N0 = N->getOperand(0);
5273   SDValue N1 = N->getOperand(1);
5274   EVT VT = N->getValueType(0);
5275
5276   // fold (rot x, 0) -> x
5277   if (isNullConstantOrNullSplatConstant(N1))
5278     return N0;
5279
5280   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5281   if (N1.getOpcode() == ISD::TRUNCATE &&
5282       N1.getOperand(0).getOpcode() == ISD::AND) {
5283     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5284       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5285   }
5286
5287   unsigned NextOp = N0.getOpcode();
5288   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5289   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR)
5290     if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1))
5291       if (SDNode *C2 =
5292           DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
5293         bool SameSide = (N->getOpcode() == NextOp);
5294         unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5295         if (SDValue CombinedShift =
5296             DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) {
5297           unsigned Bitsize = VT.getScalarSizeInBits();
5298           SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT);
5299           SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5300             ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode());
5301           return DAG.getNode(
5302             N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm);
5303         }
5304       }
5305   return SDValue();
5306 }
5307
5308 SDValue DAGCombiner::visitSHL(SDNode *N) {
5309   SDValue N0 = N->getOperand(0);
5310   SDValue N1 = N->getOperand(1);
5311   EVT VT = N0.getValueType();
5312   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5313
5314   // fold vector ops
5315   if (VT.isVector()) {
5316     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5317       return FoldedVOp;
5318
5319     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5320     // If setcc produces all-one true value then:
5321     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5322     if (N1CV && N1CV->isConstant()) {
5323       if (N0.getOpcode() == ISD::AND) {
5324         SDValue N00 = N0->getOperand(0);
5325         SDValue N01 = N0->getOperand(1);
5326         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5327
5328         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5329             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5330                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5331           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5332                                                      N01CV, N1CV))
5333             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5334         }
5335       }
5336     }
5337   }
5338
5339   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5340
5341   // fold (shl c1, c2) -> c1<<c2
5342   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5343   if (N0C && N1C && !N1C->isOpaque())
5344     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5345   // fold (shl 0, x) -> 0
5346   if (isNullConstantOrNullSplatConstant(N0))
5347     return N0;
5348   // fold (shl x, c >= size(x)) -> undef
5349   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5350     return DAG.getUNDEF(VT);
5351   // fold (shl x, 0) -> x
5352   if (N1C && N1C->isNullValue())
5353     return N0;
5354   // fold (shl undef, x) -> 0
5355   if (N0.isUndef())
5356     return DAG.getConstant(0, SDLoc(N), VT);
5357
5358   if (SDValue NewSel = foldBinOpIntoSelect(N))
5359     return NewSel;
5360
5361   // if (shl x, c) is known to be zero, return 0
5362   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5363                             APInt::getAllOnesValue(OpSizeInBits)))
5364     return DAG.getConstant(0, SDLoc(N), VT);
5365   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5366   if (N1.getOpcode() == ISD::TRUNCATE &&
5367       N1.getOperand(0).getOpcode() == ISD::AND) {
5368     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5369       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5370   }
5371
5372   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5373     return SDValue(N, 0);
5374
5375   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5376   if (N1C && N0.getOpcode() == ISD::SHL) {
5377     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5378       SDLoc DL(N);
5379       APInt c1 = N0C1->getAPIntValue();
5380       APInt c2 = N1C->getAPIntValue();
5381       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5382
5383       APInt Sum = c1 + c2;
5384       if (Sum.uge(OpSizeInBits))
5385         return DAG.getConstant(0, DL, VT);
5386
5387       return DAG.getNode(
5388           ISD::SHL, DL, VT, N0.getOperand(0),
5389           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5390     }
5391   }
5392
5393   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5394   // For this to be valid, the second form must not preserve any of the bits
5395   // that are shifted out by the inner shift in the first form.  This means
5396   // the outer shift size must be >= the number of bits added by the ext.
5397   // As a corollary, we don't care what kind of ext it is.
5398   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5399               N0.getOpcode() == ISD::ANY_EXTEND ||
5400               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5401       N0.getOperand(0).getOpcode() == ISD::SHL) {
5402     SDValue N0Op0 = N0.getOperand(0);
5403     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5404       APInt c1 = N0Op0C1->getAPIntValue();
5405       APInt c2 = N1C->getAPIntValue();
5406       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5407
5408       EVT InnerShiftVT = N0Op0.getValueType();
5409       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5410       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5411         SDLoc DL(N0);
5412         APInt Sum = c1 + c2;
5413         if (Sum.uge(OpSizeInBits))
5414           return DAG.getConstant(0, DL, VT);
5415
5416         return DAG.getNode(
5417             ISD::SHL, DL, VT,
5418             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5419             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5420       }
5421     }
5422   }
5423
5424   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5425   // Only fold this if the inner zext has no other uses to avoid increasing
5426   // the total number of instructions.
5427   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5428       N0.getOperand(0).getOpcode() == ISD::SRL) {
5429     SDValue N0Op0 = N0.getOperand(0);
5430     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5431       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5432         uint64_t c1 = N0Op0C1->getZExtValue();
5433         uint64_t c2 = N1C->getZExtValue();
5434         if (c1 == c2) {
5435           SDValue NewOp0 = N0.getOperand(0);
5436           EVT CountVT = NewOp0.getOperand(1).getValueType();
5437           SDLoc DL(N);
5438           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5439                                        NewOp0,
5440                                        DAG.getConstant(c2, DL, CountVT));
5441           AddToWorklist(NewSHL.getNode());
5442           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5443         }
5444       }
5445     }
5446   }
5447
5448   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5449   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5450   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5451       N0->getFlags().hasExact()) {
5452     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5453       uint64_t C1 = N0C1->getZExtValue();
5454       uint64_t C2 = N1C->getZExtValue();
5455       SDLoc DL(N);
5456       if (C1 <= C2)
5457         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5458                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5459       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5460                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5461     }
5462   }
5463
5464   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5465   //                               (and (srl x, (sub c1, c2), MASK)
5466   // Only fold this if the inner shift has no other uses -- if it does, folding
5467   // this will increase the total number of instructions.
5468   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5469     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5470       uint64_t c1 = N0C1->getZExtValue();
5471       if (c1 < OpSizeInBits) {
5472         uint64_t c2 = N1C->getZExtValue();
5473         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5474         SDValue Shift;
5475         if (c2 > c1) {
5476           Mask <<= c2 - c1;
5477           SDLoc DL(N);
5478           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5479                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5480         } else {
5481           Mask.lshrInPlace(c1 - c2);
5482           SDLoc DL(N);
5483           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5484                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5485         }
5486         SDLoc DL(N0);
5487         return DAG.getNode(ISD::AND, DL, VT, Shift,
5488                            DAG.getConstant(Mask, DL, VT));
5489       }
5490     }
5491   }
5492
5493   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5494   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5495       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5496     SDLoc DL(N);
5497     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5498     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5499     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5500   }
5501
5502   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5503   // Variant of version done on multiply, except mul by a power of 2 is turned
5504   // into a shift.
5505   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5506       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5507       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5508     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5509     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5510     AddToWorklist(Shl0.getNode());
5511     AddToWorklist(Shl1.getNode());
5512     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5513   }
5514
5515   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5516   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5517       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5518       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5519     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5520     if (isConstantOrConstantVector(Shl))
5521       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5522   }
5523
5524   if (N1C && !N1C->isOpaque())
5525     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5526       return NewSHL;
5527
5528   return SDValue();
5529 }
5530
5531 SDValue DAGCombiner::visitSRA(SDNode *N) {
5532   SDValue N0 = N->getOperand(0);
5533   SDValue N1 = N->getOperand(1);
5534   EVT VT = N0.getValueType();
5535   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5536
5537   // Arithmetic shifting an all-sign-bit value is a no-op.
5538   // fold (sra 0, x) -> 0
5539   // fold (sra -1, x) -> -1
5540   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5541     return N0;
5542
5543   // fold vector ops
5544   if (VT.isVector())
5545     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5546       return FoldedVOp;
5547
5548   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5549
5550   // fold (sra c1, c2) -> (sra c1, c2)
5551   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5552   if (N0C && N1C && !N1C->isOpaque())
5553     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5554   // fold (sra x, c >= size(x)) -> undef
5555   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5556     return DAG.getUNDEF(VT);
5557   // fold (sra x, 0) -> x
5558   if (N1C && N1C->isNullValue())
5559     return N0;
5560
5561   if (SDValue NewSel = foldBinOpIntoSelect(N))
5562     return NewSel;
5563
5564   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5565   // sext_inreg.
5566   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5567     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5568     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5569     if (VT.isVector())
5570       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5571                                ExtVT, VT.getVectorNumElements());
5572     if ((!LegalOperations ||
5573          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5574       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5575                          N0.getOperand(0), DAG.getValueType(ExtVT));
5576   }
5577
5578   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5579   if (N1C && N0.getOpcode() == ISD::SRA) {
5580     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5581       SDLoc DL(N);
5582       APInt c1 = N0C1->getAPIntValue();
5583       APInt c2 = N1C->getAPIntValue();
5584       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5585
5586       APInt Sum = c1 + c2;
5587       if (Sum.uge(OpSizeInBits))
5588         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
5589
5590       return DAG.getNode(
5591           ISD::SRA, DL, VT, N0.getOperand(0),
5592           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5593     }
5594   }
5595
5596   // fold (sra (shl X, m), (sub result_size, n))
5597   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5598   // result_size - n != m.
5599   // If truncate is free for the target sext(shl) is likely to result in better
5600   // code.
5601   if (N0.getOpcode() == ISD::SHL && N1C) {
5602     // Get the two constanst of the shifts, CN0 = m, CN = n.
5603     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5604     if (N01C) {
5605       LLVMContext &Ctx = *DAG.getContext();
5606       // Determine what the truncate's result bitsize and type would be.
5607       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5608
5609       if (VT.isVector())
5610         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5611
5612       // Determine the residual right-shift amount.
5613       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5614
5615       // If the shift is not a no-op (in which case this should be just a sign
5616       // extend already), the truncated to type is legal, sign_extend is legal
5617       // on that type, and the truncate to that type is both legal and free,
5618       // perform the transform.
5619       if ((ShiftAmt > 0) &&
5620           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5621           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5622           TLI.isTruncateFree(VT, TruncVT)) {
5623
5624         SDLoc DL(N);
5625         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5626             getShiftAmountTy(N0.getOperand(0).getValueType()));
5627         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5628                                     N0.getOperand(0), Amt);
5629         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5630                                     Shift);
5631         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5632                            N->getValueType(0), Trunc);
5633       }
5634     }
5635   }
5636
5637   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5638   if (N1.getOpcode() == ISD::TRUNCATE &&
5639       N1.getOperand(0).getOpcode() == ISD::AND) {
5640     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5641       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5642   }
5643
5644   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5645   //      if c1 is equal to the number of bits the trunc removes
5646   if (N0.getOpcode() == ISD::TRUNCATE &&
5647       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5648        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5649       N0.getOperand(0).hasOneUse() &&
5650       N0.getOperand(0).getOperand(1).hasOneUse() &&
5651       N1C) {
5652     SDValue N0Op0 = N0.getOperand(0);
5653     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5654       unsigned LargeShiftVal = LargeShift->getZExtValue();
5655       EVT LargeVT = N0Op0.getValueType();
5656
5657       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5658         SDLoc DL(N);
5659         SDValue Amt =
5660           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5661                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5662         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5663                                   N0Op0.getOperand(0), Amt);
5664         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5665       }
5666     }
5667   }
5668
5669   // Simplify, based on bits shifted out of the LHS.
5670   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5671     return SDValue(N, 0);
5672
5673
5674   // If the sign bit is known to be zero, switch this to a SRL.
5675   if (DAG.SignBitIsZero(N0))
5676     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5677
5678   if (N1C && !N1C->isOpaque())
5679     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5680       return NewSRA;
5681
5682   return SDValue();
5683 }
5684
5685 SDValue DAGCombiner::visitSRL(SDNode *N) {
5686   SDValue N0 = N->getOperand(0);
5687   SDValue N1 = N->getOperand(1);
5688   EVT VT = N0.getValueType();
5689   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5690
5691   // fold vector ops
5692   if (VT.isVector())
5693     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5694       return FoldedVOp;
5695
5696   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5697
5698   // fold (srl c1, c2) -> c1 >>u c2
5699   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5700   if (N0C && N1C && !N1C->isOpaque())
5701     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5702   // fold (srl 0, x) -> 0
5703   if (isNullConstantOrNullSplatConstant(N0))
5704     return N0;
5705   // fold (srl x, c >= size(x)) -> undef
5706   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5707     return DAG.getUNDEF(VT);
5708   // fold (srl x, 0) -> x
5709   if (N1C && N1C->isNullValue())
5710     return N0;
5711
5712   if (SDValue NewSel = foldBinOpIntoSelect(N))
5713     return NewSel;
5714
5715   // if (srl x, c) is known to be zero, return 0
5716   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5717                                    APInt::getAllOnesValue(OpSizeInBits)))
5718     return DAG.getConstant(0, SDLoc(N), VT);
5719
5720   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5721   if (N1C && N0.getOpcode() == ISD::SRL) {
5722     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5723       SDLoc DL(N);
5724       APInt c1 = N0C1->getAPIntValue();
5725       APInt c2 = N1C->getAPIntValue();
5726       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5727
5728       APInt Sum = c1 + c2;
5729       if (Sum.uge(OpSizeInBits))
5730         return DAG.getConstant(0, DL, VT);
5731
5732       return DAG.getNode(
5733           ISD::SRL, DL, VT, N0.getOperand(0),
5734           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5735     }
5736   }
5737
5738   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5739   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5740       N0.getOperand(0).getOpcode() == ISD::SRL) {
5741     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5742       uint64_t c1 = N001C->getZExtValue();
5743       uint64_t c2 = N1C->getZExtValue();
5744       EVT InnerShiftVT = N0.getOperand(0).getValueType();
5745       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5746       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5747       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5748       if (c1 + OpSizeInBits == InnerShiftSize) {
5749         SDLoc DL(N0);
5750         if (c1 + c2 >= InnerShiftSize)
5751           return DAG.getConstant(0, DL, VT);
5752         return DAG.getNode(ISD::TRUNCATE, DL, VT,
5753                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5754                                        N0.getOperand(0).getOperand(0),
5755                                        DAG.getConstant(c1 + c2, DL,
5756                                                        ShiftCountVT)));
5757       }
5758     }
5759   }
5760
5761   // fold (srl (shl x, c), c) -> (and x, cst2)
5762   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5763       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5764     SDLoc DL(N);
5765     SDValue Mask =
5766         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5767     AddToWorklist(Mask.getNode());
5768     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5769   }
5770
5771   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5772   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5773     // Shifting in all undef bits?
5774     EVT SmallVT = N0.getOperand(0).getValueType();
5775     unsigned BitSize = SmallVT.getScalarSizeInBits();
5776     if (N1C->getZExtValue() >= BitSize)
5777       return DAG.getUNDEF(VT);
5778
5779     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5780       uint64_t ShiftAmt = N1C->getZExtValue();
5781       SDLoc DL0(N0);
5782       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5783                                        N0.getOperand(0),
5784                           DAG.getConstant(ShiftAmt, DL0,
5785                                           getShiftAmountTy(SmallVT)));
5786       AddToWorklist(SmallShift.getNode());
5787       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5788       SDLoc DL(N);
5789       return DAG.getNode(ISD::AND, DL, VT,
5790                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5791                          DAG.getConstant(Mask, DL, VT));
5792     }
5793   }
5794
5795   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5796   // bit, which is unmodified by sra.
5797   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5798     if (N0.getOpcode() == ISD::SRA)
5799       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5800   }
5801
5802   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5803   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5804       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5805     KnownBits Known;
5806     DAG.computeKnownBits(N0.getOperand(0), Known);
5807
5808     // If any of the input bits are KnownOne, then the input couldn't be all
5809     // zeros, thus the result of the srl will always be zero.
5810     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5811
5812     // If all of the bits input the to ctlz node are known to be zero, then
5813     // the result of the ctlz is "32" and the result of the shift is one.
5814     APInt UnknownBits = ~Known.Zero;
5815     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5816
5817     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5818     if (UnknownBits.isPowerOf2()) {
5819       // Okay, we know that only that the single bit specified by UnknownBits
5820       // could be set on input to the CTLZ node. If this bit is set, the SRL
5821       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5822       // to an SRL/XOR pair, which is likely to simplify more.
5823       unsigned ShAmt = UnknownBits.countTrailingZeros();
5824       SDValue Op = N0.getOperand(0);
5825
5826       if (ShAmt) {
5827         SDLoc DL(N0);
5828         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5829                   DAG.getConstant(ShAmt, DL,
5830                                   getShiftAmountTy(Op.getValueType())));
5831         AddToWorklist(Op.getNode());
5832       }
5833
5834       SDLoc DL(N);
5835       return DAG.getNode(ISD::XOR, DL, VT,
5836                          Op, DAG.getConstant(1, DL, VT));
5837     }
5838   }
5839
5840   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5841   if (N1.getOpcode() == ISD::TRUNCATE &&
5842       N1.getOperand(0).getOpcode() == ISD::AND) {
5843     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5844       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5845   }
5846
5847   // fold operands of srl based on knowledge that the low bits are not
5848   // demanded.
5849   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5850     return SDValue(N, 0);
5851
5852   if (N1C && !N1C->isOpaque())
5853     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5854       return NewSRL;
5855
5856   // Attempt to convert a srl of a load into a narrower zero-extending load.
5857   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5858     return NarrowLoad;
5859
5860   // Here is a common situation. We want to optimize:
5861   //
5862   //   %a = ...
5863   //   %b = and i32 %a, 2
5864   //   %c = srl i32 %b, 1
5865   //   brcond i32 %c ...
5866   //
5867   // into
5868   //
5869   //   %a = ...
5870   //   %b = and %a, 2
5871   //   %c = setcc eq %b, 0
5872   //   brcond %c ...
5873   //
5874   // However when after the source operand of SRL is optimized into AND, the SRL
5875   // itself may not be optimized further. Look for it and add the BRCOND into
5876   // the worklist.
5877   if (N->hasOneUse()) {
5878     SDNode *Use = *N->use_begin();
5879     if (Use->getOpcode() == ISD::BRCOND)
5880       AddToWorklist(Use);
5881     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5882       // Also look pass the truncate.
5883       Use = *Use->use_begin();
5884       if (Use->getOpcode() == ISD::BRCOND)
5885         AddToWorklist(Use);
5886     }
5887   }
5888
5889   return SDValue();
5890 }
5891
5892 SDValue DAGCombiner::visitABS(SDNode *N) {
5893   SDValue N0 = N->getOperand(0);
5894   EVT VT = N->getValueType(0);
5895
5896   // fold (abs c1) -> c2
5897   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5898     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
5899   // fold (abs (abs x)) -> (abs x)
5900   if (N0.getOpcode() == ISD::ABS)
5901     return N0;
5902   // fold (abs x) -> x iff not-negative
5903   if (DAG.SignBitIsZero(N0))
5904     return N0;
5905   return SDValue();
5906 }
5907
5908 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5909   SDValue N0 = N->getOperand(0);
5910   EVT VT = N->getValueType(0);
5911
5912   // fold (bswap c1) -> c2
5913   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5914     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5915   // fold (bswap (bswap x)) -> x
5916   if (N0.getOpcode() == ISD::BSWAP)
5917     return N0->getOperand(0);
5918   return SDValue();
5919 }
5920
5921 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5922   SDValue N0 = N->getOperand(0);
5923   EVT VT = N->getValueType(0);
5924
5925   // fold (bitreverse c1) -> c2
5926   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5927     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
5928   // fold (bitreverse (bitreverse x)) -> x
5929   if (N0.getOpcode() == ISD::BITREVERSE)
5930     return N0.getOperand(0);
5931   return SDValue();
5932 }
5933
5934 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5935   SDValue N0 = N->getOperand(0);
5936   EVT VT = N->getValueType(0);
5937
5938   // fold (ctlz c1) -> c2
5939   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5940     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5941   return SDValue();
5942 }
5943
5944 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5945   SDValue N0 = N->getOperand(0);
5946   EVT VT = N->getValueType(0);
5947
5948   // fold (ctlz_zero_undef c1) -> c2
5949   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5950     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5951   return SDValue();
5952 }
5953
5954 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5955   SDValue N0 = N->getOperand(0);
5956   EVT VT = N->getValueType(0);
5957
5958   // fold (cttz c1) -> c2
5959   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5960     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5961   return SDValue();
5962 }
5963
5964 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5965   SDValue N0 = N->getOperand(0);
5966   EVT VT = N->getValueType(0);
5967
5968   // fold (cttz_zero_undef c1) -> c2
5969   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5970     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5971   return SDValue();
5972 }
5973
5974 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5975   SDValue N0 = N->getOperand(0);
5976   EVT VT = N->getValueType(0);
5977
5978   // fold (ctpop c1) -> c2
5979   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5980     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5981   return SDValue();
5982 }
5983
5984
5985 /// \brief Generate Min/Max node
5986 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5987                                    SDValue RHS, SDValue True, SDValue False,
5988                                    ISD::CondCode CC, const TargetLowering &TLI,
5989                                    SelectionDAG &DAG) {
5990   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5991     return SDValue();
5992
5993   switch (CC) {
5994   case ISD::SETOLT:
5995   case ISD::SETOLE:
5996   case ISD::SETLT:
5997   case ISD::SETLE:
5998   case ISD::SETULT:
5999   case ISD::SETULE: {
6000     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6001     if (TLI.isOperationLegal(Opcode, VT))
6002       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6003     return SDValue();
6004   }
6005   case ISD::SETOGT:
6006   case ISD::SETOGE:
6007   case ISD::SETGT:
6008   case ISD::SETGE:
6009   case ISD::SETUGT:
6010   case ISD::SETUGE: {
6011     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6012     if (TLI.isOperationLegal(Opcode, VT))
6013       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6014     return SDValue();
6015   }
6016   default:
6017     return SDValue();
6018   }
6019 }
6020
6021 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6022   SDValue Cond = N->getOperand(0);
6023   SDValue N1 = N->getOperand(1);
6024   SDValue N2 = N->getOperand(2);
6025   EVT VT = N->getValueType(0);
6026   EVT CondVT = Cond.getValueType();
6027   SDLoc DL(N);
6028
6029   if (!VT.isInteger())
6030     return SDValue();
6031
6032   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6033   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6034   if (!C1 || !C2)
6035     return SDValue();
6036
6037   // Only do this before legalization to avoid conflicting with target-specific
6038   // transforms in the other direction (create a select from a zext/sext). There
6039   // is also a target-independent combine here in DAGCombiner in the other
6040   // direction for (select Cond, -1, 0) when the condition is not i1.
6041   if (CondVT == MVT::i1 && !LegalOperations) {
6042     if (C1->isNullValue() && C2->isOne()) {
6043       // select Cond, 0, 1 --> zext (!Cond)
6044       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6045       if (VT != MVT::i1)
6046         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6047       return NotCond;
6048     }
6049     if (C1->isNullValue() && C2->isAllOnesValue()) {
6050       // select Cond, 0, -1 --> sext (!Cond)
6051       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6052       if (VT != MVT::i1)
6053         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6054       return NotCond;
6055     }
6056     if (C1->isOne() && C2->isNullValue()) {
6057       // select Cond, 1, 0 --> zext (Cond)
6058       if (VT != MVT::i1)
6059         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6060       return Cond;
6061     }
6062     if (C1->isAllOnesValue() && C2->isNullValue()) {
6063       // select Cond, -1, 0 --> sext (Cond)
6064       if (VT != MVT::i1)
6065         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6066       return Cond;
6067     }
6068
6069     // For any constants that differ by 1, we can transform the select into an
6070     // extend and add. Use a target hook because some targets may prefer to
6071     // transform in the other direction.
6072     if (TLI.convertSelectOfConstantsToMath()) {
6073       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6074         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6075         if (VT != MVT::i1)
6076           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6077         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6078       }
6079       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6080         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6081         if (VT != MVT::i1)
6082           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6083         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6084       }
6085     }
6086
6087     return SDValue();
6088   }
6089
6090   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6091   // We can't do this reliably if integer based booleans have different contents
6092   // to floating point based booleans. This is because we can't tell whether we
6093   // have an integer-based boolean or a floating-point-based boolean unless we
6094   // can find the SETCC that produced it and inspect its operands. This is
6095   // fairly easy if C is the SETCC node, but it can potentially be
6096   // undiscoverable (or not reasonably discoverable). For example, it could be
6097   // in another basic block or it could require searching a complicated
6098   // expression.
6099   if (CondVT.isInteger() &&
6100       TLI.getBooleanContents(false, true) ==
6101           TargetLowering::ZeroOrOneBooleanContent &&
6102       TLI.getBooleanContents(false, false) ==
6103           TargetLowering::ZeroOrOneBooleanContent &&
6104       C1->isNullValue() && C2->isOne()) {
6105     SDValue NotCond =
6106         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6107     if (VT.bitsEq(CondVT))
6108       return NotCond;
6109     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6110   }
6111
6112   return SDValue();
6113 }
6114
6115 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6116   SDValue N0 = N->getOperand(0);
6117   SDValue N1 = N->getOperand(1);
6118   SDValue N2 = N->getOperand(2);
6119   EVT VT = N->getValueType(0);
6120   EVT VT0 = N0.getValueType();
6121   SDLoc DL(N);
6122
6123   // fold (select C, X, X) -> X
6124   if (N1 == N2)
6125     return N1;
6126
6127   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6128     // fold (select true, X, Y) -> X
6129     // fold (select false, X, Y) -> Y
6130     return !N0C->isNullValue() ? N1 : N2;
6131   }
6132
6133   // fold (select X, X, Y) -> (or X, Y)
6134   // fold (select X, 1, Y) -> (or C, Y)
6135   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6136     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6137
6138   if (SDValue V = foldSelectOfConstants(N))
6139     return V;
6140
6141   // fold (select C, 0, X) -> (and (not C), X)
6142   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6143     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6144     AddToWorklist(NOTNode.getNode());
6145     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6146   }
6147   // fold (select C, X, 1) -> (or (not C), X)
6148   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6149     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6150     AddToWorklist(NOTNode.getNode());
6151     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6152   }
6153   // fold (select X, Y, X) -> (and X, Y)
6154   // fold (select X, Y, 0) -> (and X, Y)
6155   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6156     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6157
6158   // If we can fold this based on the true/false value, do so.
6159   if (SimplifySelectOps(N, N1, N2))
6160     return SDValue(N, 0); // Don't revisit N.
6161
6162   if (VT0 == MVT::i1) {
6163     // The code in this block deals with the following 2 equivalences:
6164     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6165     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6166     // The target can specify its preferred form with the
6167     // shouldNormalizeToSelectSequence() callback. However we always transform
6168     // to the right anyway if we find the inner select exists in the DAG anyway
6169     // and we always transform to the left side if we know that we can further
6170     // optimize the combination of the conditions.
6171     bool normalizeToSequence =
6172         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6173     // select (and Cond0, Cond1), X, Y
6174     //   -> select Cond0, (select Cond1, X, Y), Y
6175     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6176       SDValue Cond0 = N0->getOperand(0);
6177       SDValue Cond1 = N0->getOperand(1);
6178       SDValue InnerSelect =
6179           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6180       if (normalizeToSequence || !InnerSelect.use_empty())
6181         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6182                            InnerSelect, N2);
6183     }
6184     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6185     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6186       SDValue Cond0 = N0->getOperand(0);
6187       SDValue Cond1 = N0->getOperand(1);
6188       SDValue InnerSelect =
6189           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6190       if (normalizeToSequence || !InnerSelect.use_empty())
6191         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6192                            InnerSelect);
6193     }
6194
6195     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6196     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6197       SDValue N1_0 = N1->getOperand(0);
6198       SDValue N1_1 = N1->getOperand(1);
6199       SDValue N1_2 = N1->getOperand(2);
6200       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6201         // Create the actual and node if we can generate good code for it.
6202         if (!normalizeToSequence) {
6203           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6204           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6205         }
6206         // Otherwise see if we can optimize the "and" to a better pattern.
6207         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6208           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6209                              N2);
6210       }
6211     }
6212     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6213     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6214       SDValue N2_0 = N2->getOperand(0);
6215       SDValue N2_1 = N2->getOperand(1);
6216       SDValue N2_2 = N2->getOperand(2);
6217       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6218         // Create the actual or node if we can generate good code for it.
6219         if (!normalizeToSequence) {
6220           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6221           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6222         }
6223         // Otherwise see if we can optimize to a better pattern.
6224         if (SDValue Combined = visitORLike(N0, N2_0, N))
6225           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6226                              N2_2);
6227       }
6228     }
6229   }
6230
6231   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6232   if (VT0 == MVT::i1) {
6233     if (N0->getOpcode() == ISD::XOR) {
6234       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6235         SDValue Cond0 = N0->getOperand(0);
6236         if (C->isOne())
6237           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6238       }
6239     }
6240   }
6241
6242   // fold selects based on a setcc into other things, such as min/max/abs
6243   if (N0.getOpcode() == ISD::SETCC) {
6244     // select x, y (fcmp lt x, y) -> fminnum x, y
6245     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6246     //
6247     // This is OK if we don't care about what happens if either operand is a
6248     // NaN.
6249     //
6250
6251     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6252     // no signed zeros as well as no nans.
6253     const TargetOptions &Options = DAG.getTarget().Options;
6254     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6255         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6256       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6257
6258       if (SDValue FMinMax = combineMinNumMaxNum(
6259               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6260         return FMinMax;
6261     }
6262
6263     if ((!LegalOperations &&
6264          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6265         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6266       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6267                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6268     return SimplifySelect(DL, N0, N1, N2);
6269   }
6270
6271   return SDValue();
6272 }
6273
6274 static
6275 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6276   SDLoc DL(N);
6277   EVT LoVT, HiVT;
6278   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6279
6280   // Split the inputs.
6281   SDValue Lo, Hi, LL, LH, RL, RH;
6282   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6283   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6284
6285   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6286   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6287
6288   return std::make_pair(Lo, Hi);
6289 }
6290
6291 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6292 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6293 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6294   SDLoc DL(N);
6295   SDValue Cond = N->getOperand(0);
6296   SDValue LHS = N->getOperand(1);
6297   SDValue RHS = N->getOperand(2);
6298   EVT VT = N->getValueType(0);
6299   int NumElems = VT.getVectorNumElements();
6300   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6301          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6302          Cond.getOpcode() == ISD::BUILD_VECTOR);
6303
6304   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6305   // binary ones here.
6306   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6307     return SDValue();
6308
6309   // We're sure we have an even number of elements due to the
6310   // concat_vectors we have as arguments to vselect.
6311   // Skip BV elements until we find one that's not an UNDEF
6312   // After we find an UNDEF element, keep looping until we get to half the
6313   // length of the BV and see if all the non-undef nodes are the same.
6314   ConstantSDNode *BottomHalf = nullptr;
6315   for (int i = 0; i < NumElems / 2; ++i) {
6316     if (Cond->getOperand(i)->isUndef())
6317       continue;
6318
6319     if (BottomHalf == nullptr)
6320       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6321     else if (Cond->getOperand(i).getNode() != BottomHalf)
6322       return SDValue();
6323   }
6324
6325   // Do the same for the second half of the BuildVector
6326   ConstantSDNode *TopHalf = nullptr;
6327   for (int i = NumElems / 2; i < NumElems; ++i) {
6328     if (Cond->getOperand(i)->isUndef())
6329       continue;
6330
6331     if (TopHalf == nullptr)
6332       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6333     else if (Cond->getOperand(i).getNode() != TopHalf)
6334       return SDValue();
6335   }
6336
6337   assert(TopHalf && BottomHalf &&
6338          "One half of the selector was all UNDEFs and the other was all the "
6339          "same value. This should have been addressed before this function.");
6340   return DAG.getNode(
6341       ISD::CONCAT_VECTORS, DL, VT,
6342       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6343       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6344 }
6345
6346 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6347
6348   if (Level >= AfterLegalizeTypes)
6349     return SDValue();
6350
6351   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6352   SDValue Mask = MSC->getMask();
6353   SDValue Data  = MSC->getValue();
6354   SDLoc DL(N);
6355
6356   // If the MSCATTER data type requires splitting and the mask is provided by a
6357   // SETCC, then split both nodes and its operands before legalization. This
6358   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6359   // and enables future optimizations (e.g. min/max pattern matching on X86).
6360   if (Mask.getOpcode() != ISD::SETCC)
6361     return SDValue();
6362
6363   // Check if any splitting is required.
6364   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6365       TargetLowering::TypeSplitVector)
6366     return SDValue();
6367   SDValue MaskLo, MaskHi, Lo, Hi;
6368   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6369
6370   EVT LoVT, HiVT;
6371   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6372
6373   SDValue Chain = MSC->getChain();
6374
6375   EVT MemoryVT = MSC->getMemoryVT();
6376   unsigned Alignment = MSC->getOriginalAlignment();
6377
6378   EVT LoMemVT, HiMemVT;
6379   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6380
6381   SDValue DataLo, DataHi;
6382   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6383
6384   SDValue BasePtr = MSC->getBasePtr();
6385   SDValue IndexLo, IndexHi;
6386   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6387
6388   MachineMemOperand *MMO = DAG.getMachineFunction().
6389     getMachineMemOperand(MSC->getPointerInfo(),
6390                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6391                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6392
6393   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6394   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6395                             DL, OpsLo, MMO);
6396
6397   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6398   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6399                             DL, OpsHi, MMO);
6400
6401   AddToWorklist(Lo.getNode());
6402   AddToWorklist(Hi.getNode());
6403
6404   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6405 }
6406
6407 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6408
6409   if (Level >= AfterLegalizeTypes)
6410     return SDValue();
6411
6412   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6413   SDValue Mask = MST->getMask();
6414   SDValue Data  = MST->getValue();
6415   EVT VT = Data.getValueType();
6416   SDLoc DL(N);
6417
6418   // If the MSTORE data type requires splitting and the mask is provided by a
6419   // SETCC, then split both nodes and its operands before legalization. This
6420   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6421   // and enables future optimizations (e.g. min/max pattern matching on X86).
6422   if (Mask.getOpcode() == ISD::SETCC) {
6423
6424     // Check if any splitting is required.
6425     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6426         TargetLowering::TypeSplitVector)
6427       return SDValue();
6428
6429     SDValue MaskLo, MaskHi, Lo, Hi;
6430     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6431
6432     SDValue Chain = MST->getChain();
6433     SDValue Ptr   = MST->getBasePtr();
6434
6435     EVT MemoryVT = MST->getMemoryVT();
6436     unsigned Alignment = MST->getOriginalAlignment();
6437
6438     // if Alignment is equal to the vector size,
6439     // take the half of it for the second part
6440     unsigned SecondHalfAlignment =
6441       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6442
6443     EVT LoMemVT, HiMemVT;
6444     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6445
6446     SDValue DataLo, DataHi;
6447     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6448
6449     MachineMemOperand *MMO = DAG.getMachineFunction().
6450       getMachineMemOperand(MST->getPointerInfo(),
6451                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6452                            Alignment, MST->getAAInfo(), MST->getRanges());
6453
6454     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6455                             MST->isTruncatingStore(),
6456                             MST->isCompressingStore());
6457
6458     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6459                                      MST->isCompressingStore());
6460
6461     MMO = DAG.getMachineFunction().
6462       getMachineMemOperand(MST->getPointerInfo(),
6463                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6464                            SecondHalfAlignment, MST->getAAInfo(),
6465                            MST->getRanges());
6466
6467     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6468                             MST->isTruncatingStore(),
6469                             MST->isCompressingStore());
6470
6471     AddToWorklist(Lo.getNode());
6472     AddToWorklist(Hi.getNode());
6473
6474     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6475   }
6476   return SDValue();
6477 }
6478
6479 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6480
6481   if (Level >= AfterLegalizeTypes)
6482     return SDValue();
6483
6484   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6485   SDValue Mask = MGT->getMask();
6486   SDLoc DL(N);
6487
6488   // If the MGATHER result requires splitting and the mask is provided by a
6489   // SETCC, then split both nodes and its operands before legalization. This
6490   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6491   // and enables future optimizations (e.g. min/max pattern matching on X86).
6492
6493   if (Mask.getOpcode() != ISD::SETCC)
6494     return SDValue();
6495
6496   EVT VT = N->getValueType(0);
6497
6498   // Check if any splitting is required.
6499   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6500       TargetLowering::TypeSplitVector)
6501     return SDValue();
6502
6503   SDValue MaskLo, MaskHi, Lo, Hi;
6504   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6505
6506   SDValue Src0 = MGT->getValue();
6507   SDValue Src0Lo, Src0Hi;
6508   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6509
6510   EVT LoVT, HiVT;
6511   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6512
6513   SDValue Chain = MGT->getChain();
6514   EVT MemoryVT = MGT->getMemoryVT();
6515   unsigned Alignment = MGT->getOriginalAlignment();
6516
6517   EVT LoMemVT, HiMemVT;
6518   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6519
6520   SDValue BasePtr = MGT->getBasePtr();
6521   SDValue Index = MGT->getIndex();
6522   SDValue IndexLo, IndexHi;
6523   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6524
6525   MachineMemOperand *MMO = DAG.getMachineFunction().
6526     getMachineMemOperand(MGT->getPointerInfo(),
6527                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6528                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6529
6530   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6531   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6532                             MMO);
6533
6534   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6535   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6536                             MMO);
6537
6538   AddToWorklist(Lo.getNode());
6539   AddToWorklist(Hi.getNode());
6540
6541   // Build a factor node to remember that this load is independent of the
6542   // other one.
6543   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6544                       Hi.getValue(1));
6545
6546   // Legalized the chain result - switch anything that used the old chain to
6547   // use the new one.
6548   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6549
6550   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6551
6552   SDValue RetOps[] = { GatherRes, Chain };
6553   return DAG.getMergeValues(RetOps, DL);
6554 }
6555
6556 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6557
6558   if (Level >= AfterLegalizeTypes)
6559     return SDValue();
6560
6561   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6562   SDValue Mask = MLD->getMask();
6563   SDLoc DL(N);
6564
6565   // If the MLOAD result requires splitting and the mask is provided by a
6566   // SETCC, then split both nodes and its operands before legalization. This
6567   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6568   // and enables future optimizations (e.g. min/max pattern matching on X86).
6569
6570   if (Mask.getOpcode() == ISD::SETCC) {
6571     EVT VT = N->getValueType(0);
6572
6573     // Check if any splitting is required.
6574     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6575         TargetLowering::TypeSplitVector)
6576       return SDValue();
6577
6578     SDValue MaskLo, MaskHi, Lo, Hi;
6579     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6580
6581     SDValue Src0 = MLD->getSrc0();
6582     SDValue Src0Lo, Src0Hi;
6583     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6584
6585     EVT LoVT, HiVT;
6586     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6587
6588     SDValue Chain = MLD->getChain();
6589     SDValue Ptr   = MLD->getBasePtr();
6590     EVT MemoryVT = MLD->getMemoryVT();
6591     unsigned Alignment = MLD->getOriginalAlignment();
6592
6593     // if Alignment is equal to the vector size,
6594     // take the half of it for the second part
6595     unsigned SecondHalfAlignment =
6596       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6597          Alignment/2 : Alignment;
6598
6599     EVT LoMemVT, HiMemVT;
6600     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6601
6602     MachineMemOperand *MMO = DAG.getMachineFunction().
6603     getMachineMemOperand(MLD->getPointerInfo(),
6604                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6605                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6606
6607     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6608                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6609
6610     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6611                                      MLD->isExpandingLoad());
6612
6613     MMO = DAG.getMachineFunction().
6614     getMachineMemOperand(MLD->getPointerInfo(),
6615                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6616                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6617
6618     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6619                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6620
6621     AddToWorklist(Lo.getNode());
6622     AddToWorklist(Hi.getNode());
6623
6624     // Build a factor node to remember that this load is independent of the
6625     // other one.
6626     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6627                         Hi.getValue(1));
6628
6629     // Legalized the chain result - switch anything that used the old chain to
6630     // use the new one.
6631     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6632
6633     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6634
6635     SDValue RetOps[] = { LoadRes, Chain };
6636     return DAG.getMergeValues(RetOps, DL);
6637   }
6638   return SDValue();
6639 }
6640
6641 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6642   SDValue N0 = N->getOperand(0);
6643   SDValue N1 = N->getOperand(1);
6644   SDValue N2 = N->getOperand(2);
6645   SDLoc DL(N);
6646
6647   // fold (vselect C, X, X) -> X
6648   if (N1 == N2)
6649     return N1;
6650
6651   // Canonicalize integer abs.
6652   // vselect (setg[te] X,  0),  X, -X ->
6653   // vselect (setgt    X, -1),  X, -X ->
6654   // vselect (setl[te] X,  0), -X,  X ->
6655   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6656   if (N0.getOpcode() == ISD::SETCC) {
6657     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6658     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6659     bool isAbs = false;
6660     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6661
6662     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6663          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6664         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6665       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6666     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6667              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6668       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6669
6670     if (isAbs) {
6671       EVT VT = LHS.getValueType();
6672       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6673         return DAG.getNode(ISD::ABS, DL, VT, LHS);
6674
6675       SDValue Shift = DAG.getNode(
6676           ISD::SRA, DL, VT, LHS,
6677           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6678       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6679       AddToWorklist(Shift.getNode());
6680       AddToWorklist(Add.getNode());
6681       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6682     }
6683   }
6684
6685   if (SimplifySelectOps(N, N1, N2))
6686     return SDValue(N, 0);  // Don't revisit N.
6687
6688   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6689   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6690     return N1;
6691   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6692   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6693     return N2;
6694
6695   // The ConvertSelectToConcatVector function is assuming both the above
6696   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6697   // and addressed.
6698   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6699       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6700       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6701     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6702       return CV;
6703   }
6704
6705   return SDValue();
6706 }
6707
6708 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6709   SDValue N0 = N->getOperand(0);
6710   SDValue N1 = N->getOperand(1);
6711   SDValue N2 = N->getOperand(2);
6712   SDValue N3 = N->getOperand(3);
6713   SDValue N4 = N->getOperand(4);
6714   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6715
6716   // fold select_cc lhs, rhs, x, x, cc -> x
6717   if (N2 == N3)
6718     return N2;
6719
6720   // Determine if the condition we're dealing with is constant
6721   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6722                                   CC, SDLoc(N), false)) {
6723     AddToWorklist(SCC.getNode());
6724
6725     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6726       if (!SCCC->isNullValue())
6727         return N2;    // cond always true -> true val
6728       else
6729         return N3;    // cond always false -> false val
6730     } else if (SCC->isUndef()) {
6731       // When the condition is UNDEF, just return the first operand. This is
6732       // coherent the DAG creation, no setcc node is created in this case
6733       return N2;
6734     } else if (SCC.getOpcode() == ISD::SETCC) {
6735       // Fold to a simpler select_cc
6736       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6737                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6738                          SCC.getOperand(2));
6739     }
6740   }
6741
6742   // If we can fold this based on the true/false value, do so.
6743   if (SimplifySelectOps(N, N2, N3))
6744     return SDValue(N, 0);  // Don't revisit N.
6745
6746   // fold select_cc into other things, such as min/max/abs
6747   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6748 }
6749
6750 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6751   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6752                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6753                        SDLoc(N));
6754 }
6755
6756 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6757   SDValue LHS = N->getOperand(0);
6758   SDValue RHS = N->getOperand(1);
6759   SDValue Carry = N->getOperand(2);
6760   SDValue Cond = N->getOperand(3);
6761
6762   // If Carry is false, fold to a regular SETCC.
6763   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6764     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6765
6766   return SDValue();
6767 }
6768
6769 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6770   SDValue LHS = N->getOperand(0);
6771   SDValue RHS = N->getOperand(1);
6772   SDValue Carry = N->getOperand(2);
6773   SDValue Cond = N->getOperand(3);
6774
6775   // If Carry is false, fold to a regular SETCC.
6776   if (isNullConstant(Carry))
6777     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6778
6779   return SDValue();
6780 }
6781
6782 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6783 /// a build_vector of constants.
6784 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6785 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6786 /// Vector extends are not folded if operations are legal; this is to
6787 /// avoid introducing illegal build_vector dag nodes.
6788 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6789                                          SelectionDAG &DAG, bool LegalTypes,
6790                                          bool LegalOperations) {
6791   unsigned Opcode = N->getOpcode();
6792   SDValue N0 = N->getOperand(0);
6793   EVT VT = N->getValueType(0);
6794
6795   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6796          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6797          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6798          && "Expected EXTEND dag node in input!");
6799
6800   // fold (sext c1) -> c1
6801   // fold (zext c1) -> c1
6802   // fold (aext c1) -> c1
6803   if (isa<ConstantSDNode>(N0))
6804     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6805
6806   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6807   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6808   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6809   EVT SVT = VT.getScalarType();
6810   if (!(VT.isVector() &&
6811       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6812       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6813     return nullptr;
6814
6815   // We can fold this node into a build_vector.
6816   unsigned VTBits = SVT.getSizeInBits();
6817   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6818   SmallVector<SDValue, 8> Elts;
6819   unsigned NumElts = VT.getVectorNumElements();
6820   SDLoc DL(N);
6821
6822   for (unsigned i=0; i != NumElts; ++i) {
6823     SDValue Op = N0->getOperand(i);
6824     if (Op->isUndef()) {
6825       Elts.push_back(DAG.getUNDEF(SVT));
6826       continue;
6827     }
6828
6829     SDLoc DL(Op);
6830     // Get the constant value and if needed trunc it to the size of the type.
6831     // Nodes like build_vector might have constants wider than the scalar type.
6832     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6833     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6834       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6835     else
6836       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6837   }
6838
6839   return DAG.getBuildVector(VT, DL, Elts).getNode();
6840 }
6841
6842 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6843 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6844 // transformation. Returns true if extension are possible and the above
6845 // mentioned transformation is profitable.
6846 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
6847                                     unsigned ExtOpc,
6848                                     SmallVectorImpl<SDNode *> &ExtendNodes,
6849                                     const TargetLowering &TLI) {
6850   bool HasCopyToRegUses = false;
6851   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6852   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6853                             UE = N0.getNode()->use_end();
6854        UI != UE; ++UI) {
6855     SDNode *User = *UI;
6856     if (User == N)
6857       continue;
6858     if (UI.getUse().getResNo() != N0.getResNo())
6859       continue;
6860     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6861     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6862       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6863       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6864         // Sign bits will be lost after a zext.
6865         return false;
6866       bool Add = false;
6867       for (unsigned i = 0; i != 2; ++i) {
6868         SDValue UseOp = User->getOperand(i);
6869         if (UseOp == N0)
6870           continue;
6871         if (!isa<ConstantSDNode>(UseOp))
6872           return false;
6873         Add = true;
6874       }
6875       if (Add)
6876         ExtendNodes.push_back(User);
6877       continue;
6878     }
6879     // If truncates aren't free and there are users we can't
6880     // extend, it isn't worthwhile.
6881     if (!isTruncFree)
6882       return false;
6883     // Remember if this value is live-out.
6884     if (User->getOpcode() == ISD::CopyToReg)
6885       HasCopyToRegUses = true;
6886   }
6887
6888   if (HasCopyToRegUses) {
6889     bool BothLiveOut = false;
6890     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6891          UI != UE; ++UI) {
6892       SDUse &Use = UI.getUse();
6893       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6894         BothLiveOut = true;
6895         break;
6896       }
6897     }
6898     if (BothLiveOut)
6899       // Both unextended and extended values are live out. There had better be
6900       // a good reason for the transformation.
6901       return ExtendNodes.size();
6902   }
6903   return true;
6904 }
6905
6906 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6907                                   SDValue Trunc, SDValue ExtLoad,
6908                                   const SDLoc &DL, ISD::NodeType ExtType) {
6909   // Extend SetCC uses if necessary.
6910   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6911     SDNode *SetCC = SetCCs[i];
6912     SmallVector<SDValue, 4> Ops;
6913
6914     for (unsigned j = 0; j != 2; ++j) {
6915       SDValue SOp = SetCC->getOperand(j);
6916       if (SOp == Trunc)
6917         Ops.push_back(ExtLoad);
6918       else
6919         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6920     }
6921
6922     Ops.push_back(SetCC->getOperand(2));
6923     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6924   }
6925 }
6926
6927 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6928 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6929   SDValue N0 = N->getOperand(0);
6930   EVT DstVT = N->getValueType(0);
6931   EVT SrcVT = N0.getValueType();
6932
6933   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6934           N->getOpcode() == ISD::ZERO_EXTEND) &&
6935          "Unexpected node type (not an extend)!");
6936
6937   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6938   // For example, on a target with legal v4i32, but illegal v8i32, turn:
6939   //   (v8i32 (sext (v8i16 (load x))))
6940   // into:
6941   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
6942   //                          (v4i32 (sextload (x + 16)))))
6943   // Where uses of the original load, i.e.:
6944   //   (v8i16 (load x))
6945   // are replaced with:
6946   //   (v8i16 (truncate
6947   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
6948   //                            (v4i32 (sextload (x + 16)))))))
6949   //
6950   // This combine is only applicable to illegal, but splittable, vectors.
6951   // All legal types, and illegal non-vector types, are handled elsewhere.
6952   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6953   //
6954   if (N0->getOpcode() != ISD::LOAD)
6955     return SDValue();
6956
6957   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6958
6959   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6960       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6961       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6962     return SDValue();
6963
6964   SmallVector<SDNode *, 4> SetCCs;
6965   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6966     return SDValue();
6967
6968   ISD::LoadExtType ExtType =
6969       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6970
6971   // Try to split the vector types to get down to legal types.
6972   EVT SplitSrcVT = SrcVT;
6973   EVT SplitDstVT = DstVT;
6974   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6975          SplitSrcVT.getVectorNumElements() > 1) {
6976     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6977     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6978   }
6979
6980   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6981     return SDValue();
6982
6983   SDLoc DL(N);
6984   const unsigned NumSplits =
6985       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6986   const unsigned Stride = SplitSrcVT.getStoreSize();
6987   SmallVector<SDValue, 4> Loads;
6988   SmallVector<SDValue, 4> Chains;
6989
6990   SDValue BasePtr = LN0->getBasePtr();
6991   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6992     const unsigned Offset = Idx * Stride;
6993     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6994
6995     SDValue SplitLoad = DAG.getExtLoad(
6996         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6997         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6998         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6999
7000     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7001                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7002
7003     Loads.push_back(SplitLoad.getValue(0));
7004     Chains.push_back(SplitLoad.getValue(1));
7005   }
7006
7007   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7008   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7009
7010   // Simplify TF.
7011   AddToWorklist(NewChain.getNode());
7012
7013   CombineTo(N, NewValue);
7014
7015   // Replace uses of the original load (before extension)
7016   // with a truncate of the concatenated sextloaded vectors.
7017   SDValue Trunc =
7018       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7019   CombineTo(N0.getNode(), Trunc, NewChain);
7020   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7021                   (ISD::NodeType)N->getOpcode());
7022   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7023 }
7024
7025 /// If we're narrowing or widening the result of a vector select and the final
7026 /// size is the same size as a setcc (compare) feeding the select, then try to
7027 /// apply the cast operation to the select's operands because matching vector
7028 /// sizes for a select condition and other operands should be more efficient.
7029 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7030   unsigned CastOpcode = Cast->getOpcode();
7031   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7032           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7033           CastOpcode == ISD::FP_ROUND) &&
7034          "Unexpected opcode for vector select narrowing/widening");
7035
7036   // We only do this transform before legal ops because the pattern may be
7037   // obfuscated by target-specific operations after legalization. Do not create
7038   // an illegal select op, however, because that may be difficult to lower.
7039   EVT VT = Cast->getValueType(0);
7040   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7041     return SDValue();
7042
7043   SDValue VSel = Cast->getOperand(0);
7044   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7045       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7046     return SDValue();
7047
7048   // Does the setcc have the same vector size as the casted select?
7049   SDValue SetCC = VSel.getOperand(0);
7050   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7051   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7052     return SDValue();
7053
7054   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7055   SDValue A = VSel.getOperand(1);
7056   SDValue B = VSel.getOperand(2);
7057   SDValue CastA, CastB;
7058   SDLoc DL(Cast);
7059   if (CastOpcode == ISD::FP_ROUND) {
7060     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7061     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7062     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7063   } else {
7064     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7065     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7066   }
7067   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7068 }
7069
7070 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7071   SDValue N0 = N->getOperand(0);
7072   EVT VT = N->getValueType(0);
7073   SDLoc DL(N);
7074
7075   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7076                                               LegalOperations))
7077     return SDValue(Res, 0);
7078
7079   // fold (sext (sext x)) -> (sext x)
7080   // fold (sext (aext x)) -> (sext x)
7081   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7082     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7083
7084   if (N0.getOpcode() == ISD::TRUNCATE) {
7085     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7086     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7087     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7088       SDNode *oye = N0.getOperand(0).getNode();
7089       if (NarrowLoad.getNode() != N0.getNode()) {
7090         CombineTo(N0.getNode(), NarrowLoad);
7091         // CombineTo deleted the truncate, if needed, but not what's under it.
7092         AddToWorklist(oye);
7093       }
7094       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7095     }
7096
7097     // See if the value being truncated is already sign extended.  If so, just
7098     // eliminate the trunc/sext pair.
7099     SDValue Op = N0.getOperand(0);
7100     unsigned OpBits   = Op.getScalarValueSizeInBits();
7101     unsigned MidBits  = N0.getScalarValueSizeInBits();
7102     unsigned DestBits = VT.getScalarSizeInBits();
7103     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7104
7105     if (OpBits == DestBits) {
7106       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7107       // bits, it is already ready.
7108       if (NumSignBits > DestBits-MidBits)
7109         return Op;
7110     } else if (OpBits < DestBits) {
7111       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7112       // bits, just sext from i32.
7113       if (NumSignBits > OpBits-MidBits)
7114         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7115     } else {
7116       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7117       // bits, just truncate to i32.
7118       if (NumSignBits > OpBits-MidBits)
7119         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7120     }
7121
7122     // fold (sext (truncate x)) -> (sextinreg x).
7123     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7124                                                  N0.getValueType())) {
7125       if (OpBits < DestBits)
7126         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7127       else if (OpBits > DestBits)
7128         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7129       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7130                          DAG.getValueType(N0.getValueType()));
7131     }
7132   }
7133
7134   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7135   // Only generate vector extloads when 1) they're legal, and 2) they are
7136   // deemed desirable by the target.
7137   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7138       ((!LegalOperations && !VT.isVector() &&
7139         !cast<LoadSDNode>(N0)->isVolatile()) ||
7140        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7141     bool DoXform = true;
7142     SmallVector<SDNode*, 4> SetCCs;
7143     if (!N0.hasOneUse())
7144       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7145     if (VT.isVector())
7146       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7147     if (DoXform) {
7148       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7149       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7150                                        LN0->getBasePtr(), N0.getValueType(),
7151                                        LN0->getMemOperand());
7152       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7153                                   N0.getValueType(), ExtLoad);
7154       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7155       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7156       return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
7157     }
7158   }
7159
7160   // fold (sext (load x)) to multiple smaller sextloads.
7161   // Only on illegal but splittable vectors.
7162   if (SDValue ExtLoad = CombineExtLoad(N))
7163     return ExtLoad;
7164
7165   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7166   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7167   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7168       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7169     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7170     EVT MemVT = LN0->getMemoryVT();
7171     if ((!LegalOperations && !LN0->isVolatile()) ||
7172         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7173       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7174                                        LN0->getBasePtr(), MemVT,
7175                                        LN0->getMemOperand());
7176       CombineTo(N, ExtLoad);
7177       CombineTo(N0.getNode(),
7178                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7179                             N0.getValueType(), ExtLoad),
7180                 ExtLoad.getValue(1));
7181       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7182     }
7183   }
7184
7185   // fold (sext (and/or/xor (load x), cst)) ->
7186   //      (and/or/xor (sextload x), (sext cst))
7187   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7188        N0.getOpcode() == ISD::XOR) &&
7189       isa<LoadSDNode>(N0.getOperand(0)) &&
7190       N0.getOperand(1).getOpcode() == ISD::Constant &&
7191       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7192       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7193     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7194     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7195       bool DoXform = true;
7196       SmallVector<SDNode*, 4> SetCCs;
7197       if (!N0.hasOneUse())
7198         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7199                                           SetCCs, TLI);
7200       if (DoXform) {
7201         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7202                                          LN0->getChain(), LN0->getBasePtr(),
7203                                          LN0->getMemoryVT(),
7204                                          LN0->getMemOperand());
7205         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7206         Mask = Mask.sext(VT.getSizeInBits());
7207         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7208                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7209         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7210                                     SDLoc(N0.getOperand(0)),
7211                                     N0.getOperand(0).getValueType(), ExtLoad);
7212         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7213         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7214         return CombineTo(N, And); // Return N so it doesn't get rechecked!
7215       }
7216     }
7217   }
7218
7219   if (N0.getOpcode() == ISD::SETCC) {
7220     SDValue N00 = N0.getOperand(0);
7221     SDValue N01 = N0.getOperand(1);
7222     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7223     EVT N00VT = N0.getOperand(0).getValueType();
7224
7225     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7226     // Only do this before legalize for now.
7227     if (VT.isVector() && !LegalOperations &&
7228         TLI.getBooleanContents(N00VT) ==
7229             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7230       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7231       // of the same size as the compared operands. Only optimize sext(setcc())
7232       // if this is the case.
7233       EVT SVT = getSetCCResultType(N00VT);
7234
7235       // We know that the # elements of the results is the same as the
7236       // # elements of the compare (and the # elements of the compare result
7237       // for that matter).  Check to see that they are the same size.  If so,
7238       // we know that the element size of the sext'd result matches the
7239       // element size of the compare operands.
7240       if (VT.getSizeInBits() == SVT.getSizeInBits())
7241         return DAG.getSetCC(DL, VT, N00, N01, CC);
7242
7243       // If the desired elements are smaller or larger than the source
7244       // elements, we can use a matching integer vector type and then
7245       // truncate/sign extend.
7246       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7247       if (SVT == MatchingVecType) {
7248         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7249         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7250       }
7251     }
7252
7253     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7254     // Here, T can be 1 or -1, depending on the type of the setcc and
7255     // getBooleanContents().
7256     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7257
7258     // To determine the "true" side of the select, we need to know the high bit
7259     // of the value returned by the setcc if it evaluates to true.
7260     // If the type of the setcc is i1, then the true case of the select is just
7261     // sext(i1 1), that is, -1.
7262     // If the type of the setcc is larger (say, i8) then the value of the high
7263     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7264     // of the appropriate width.
7265     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7266                                            : TLI.getConstTrueVal(DAG, VT, DL);
7267     SDValue Zero = DAG.getConstant(0, DL, VT);
7268     if (SDValue SCC =
7269             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7270       return SCC;
7271
7272     if (!VT.isVector()) {
7273       EVT SetCCVT = getSetCCResultType(N00VT);
7274       // Don't do this transform for i1 because there's a select transform
7275       // that would reverse it.
7276       // TODO: We should not do this transform at all without a target hook
7277       // because a sext is likely cheaper than a select?
7278       if (SetCCVT.getScalarSizeInBits() != 1 &&
7279           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7280         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7281         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7282       }
7283     }
7284   }
7285
7286   // fold (sext x) -> (zext x) if the sign bit is known zero.
7287   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7288       DAG.SignBitIsZero(N0))
7289     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7290
7291   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7292     return NewVSel;
7293
7294   return SDValue();
7295 }
7296
7297 // isTruncateOf - If N is a truncate of some other value, return true, record
7298 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7299 // This function computes KnownBits to avoid a duplicated call to
7300 // computeKnownBits in the caller.
7301 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7302                          KnownBits &Known) {
7303   if (N->getOpcode() == ISD::TRUNCATE) {
7304     Op = N->getOperand(0);
7305     DAG.computeKnownBits(Op, Known);
7306     return true;
7307   }
7308
7309   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7310       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7311     return false;
7312
7313   SDValue Op0 = N->getOperand(0);
7314   SDValue Op1 = N->getOperand(1);
7315   assert(Op0.getValueType() == Op1.getValueType());
7316
7317   if (isNullConstant(Op0))
7318     Op = Op1;
7319   else if (isNullConstant(Op1))
7320     Op = Op0;
7321   else
7322     return false;
7323
7324   DAG.computeKnownBits(Op, Known);
7325
7326   if (!(Known.Zero | 1).isAllOnesValue())
7327     return false;
7328
7329   return true;
7330 }
7331
7332 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7333   SDValue N0 = N->getOperand(0);
7334   EVT VT = N->getValueType(0);
7335
7336   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7337                                               LegalOperations))
7338     return SDValue(Res, 0);
7339
7340   // fold (zext (zext x)) -> (zext x)
7341   // fold (zext (aext x)) -> (zext x)
7342   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7343     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7344                        N0.getOperand(0));
7345
7346   // fold (zext (truncate x)) -> (zext x) or
7347   //      (zext (truncate x)) -> (truncate x)
7348   // This is valid when the truncated bits of x are already zero.
7349   // FIXME: We should extend this to work for vectors too.
7350   SDValue Op;
7351   KnownBits Known;
7352   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7353     APInt TruncatedBits =
7354       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7355       APInt(Op.getValueSizeInBits(), 0) :
7356       APInt::getBitsSet(Op.getValueSizeInBits(),
7357                         N0.getValueSizeInBits(),
7358                         std::min(Op.getValueSizeInBits(),
7359                                  VT.getSizeInBits()));
7360     if (TruncatedBits.isSubsetOf(Known.Zero))
7361       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7362   }
7363
7364   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7365   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7366   if (N0.getOpcode() == ISD::TRUNCATE) {
7367     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7368       SDNode *oye = N0.getOperand(0).getNode();
7369       if (NarrowLoad.getNode() != N0.getNode()) {
7370         CombineTo(N0.getNode(), NarrowLoad);
7371         // CombineTo deleted the truncate, if needed, but not what's under it.
7372         AddToWorklist(oye);
7373       }
7374       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7375     }
7376   }
7377
7378   // fold (zext (truncate x)) -> (and x, mask)
7379   if (N0.getOpcode() == ISD::TRUNCATE) {
7380     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7381     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7382     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7383       SDNode *oye = N0.getOperand(0).getNode();
7384       if (NarrowLoad.getNode() != N0.getNode()) {
7385         CombineTo(N0.getNode(), NarrowLoad);
7386         // CombineTo deleted the truncate, if needed, but not what's under it.
7387         AddToWorklist(oye);
7388       }
7389       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7390     }
7391
7392     EVT SrcVT = N0.getOperand(0).getValueType();
7393     EVT MinVT = N0.getValueType();
7394
7395     // Try to mask before the extension to avoid having to generate a larger mask,
7396     // possibly over several sub-vectors.
7397     if (SrcVT.bitsLT(VT)) {
7398       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7399                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7400         SDValue Op = N0.getOperand(0);
7401         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7402         AddToWorklist(Op.getNode());
7403         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7404       }
7405     }
7406
7407     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7408       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7409       AddToWorklist(Op.getNode());
7410       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7411     }
7412   }
7413
7414   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7415   // if either of the casts is not free.
7416   if (N0.getOpcode() == ISD::AND &&
7417       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7418       N0.getOperand(1).getOpcode() == ISD::Constant &&
7419       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7420                            N0.getValueType()) ||
7421        !TLI.isZExtFree(N0.getValueType(), VT))) {
7422     SDValue X = N0.getOperand(0).getOperand(0);
7423     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7424     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7425     Mask = Mask.zext(VT.getSizeInBits());
7426     SDLoc DL(N);
7427     return DAG.getNode(ISD::AND, DL, VT,
7428                        X, DAG.getConstant(Mask, DL, VT));
7429   }
7430
7431   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7432   // Only generate vector extloads when 1) they're legal, and 2) they are
7433   // deemed desirable by the target.
7434   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7435       ((!LegalOperations && !VT.isVector() &&
7436         !cast<LoadSDNode>(N0)->isVolatile()) ||
7437        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7438     bool DoXform = true;
7439     SmallVector<SDNode*, 4> SetCCs;
7440     if (!N0.hasOneUse())
7441       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7442     if (VT.isVector())
7443       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7444     if (DoXform) {
7445       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7446       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7447                                        LN0->getChain(),
7448                                        LN0->getBasePtr(), N0.getValueType(),
7449                                        LN0->getMemOperand());
7450
7451       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7452                                   N0.getValueType(), ExtLoad);
7453       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7454       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7455       return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
7456     }
7457   }
7458
7459   // fold (zext (load x)) to multiple smaller zextloads.
7460   // Only on illegal but splittable vectors.
7461   if (SDValue ExtLoad = CombineExtLoad(N))
7462     return ExtLoad;
7463
7464   // fold (zext (and/or/xor (load x), cst)) ->
7465   //      (and/or/xor (zextload x), (zext cst))
7466   // Unless (and (load x) cst) will match as a zextload already and has
7467   // additional users.
7468   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7469        N0.getOpcode() == ISD::XOR) &&
7470       isa<LoadSDNode>(N0.getOperand(0)) &&
7471       N0.getOperand(1).getOpcode() == ISD::Constant &&
7472       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7473       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7474     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7475     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7476       bool DoXform = true;
7477       SmallVector<SDNode*, 4> SetCCs;
7478       if (!N0.hasOneUse()) {
7479         if (N0.getOpcode() == ISD::AND) {
7480           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7481           auto NarrowLoad = false;
7482           EVT LoadResultTy = AndC->getValueType(0);
7483           EVT ExtVT, LoadedVT;
7484           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7485                                NarrowLoad))
7486             DoXform = false;
7487         }
7488         if (DoXform)
7489           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7490                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7491       }
7492       if (DoXform) {
7493         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7494                                          LN0->getChain(), LN0->getBasePtr(),
7495                                          LN0->getMemoryVT(),
7496                                          LN0->getMemOperand());
7497         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7498         Mask = Mask.zext(VT.getSizeInBits());
7499         SDLoc DL(N);
7500         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7501                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7502         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7503                                     SDLoc(N0.getOperand(0)),
7504                                     N0.getOperand(0).getValueType(), ExtLoad);
7505         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7506         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7507         return CombineTo(N, And); // Return N so it doesn't get rechecked!
7508       }
7509     }
7510   }
7511
7512   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7513   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7514   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7515       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7516     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7517     EVT MemVT = LN0->getMemoryVT();
7518     if ((!LegalOperations && !LN0->isVolatile()) ||
7519         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7520       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7521                                        LN0->getChain(),
7522                                        LN0->getBasePtr(), MemVT,
7523                                        LN0->getMemOperand());
7524       CombineTo(N, ExtLoad);
7525       CombineTo(N0.getNode(),
7526                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7527                             ExtLoad),
7528                 ExtLoad.getValue(1));
7529       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7530     }
7531   }
7532
7533   if (N0.getOpcode() == ISD::SETCC) {
7534     // Only do this before legalize for now.
7535     if (!LegalOperations && VT.isVector() &&
7536         N0.getValueType().getVectorElementType() == MVT::i1) {
7537       EVT N00VT = N0.getOperand(0).getValueType();
7538       if (getSetCCResultType(N00VT) == N0.getValueType())
7539         return SDValue();
7540
7541       // We know that the # elements of the results is the same as the #
7542       // elements of the compare (and the # elements of the compare result for
7543       // that matter). Check to see that they are the same size. If so, we know
7544       // that the element size of the sext'd result matches the element size of
7545       // the compare operands.
7546       SDLoc DL(N);
7547       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7548       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7549         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7550         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7551                                      N0.getOperand(1), N0.getOperand(2));
7552         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7553       }
7554
7555       // If the desired elements are smaller or larger than the source
7556       // elements we can use a matching integer vector type and then
7557       // truncate/sign extend.
7558       EVT MatchingElementType = EVT::getIntegerVT(
7559           *DAG.getContext(), N00VT.getScalarSizeInBits());
7560       EVT MatchingVectorType = EVT::getVectorVT(
7561           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7562       SDValue VsetCC =
7563           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7564                       N0.getOperand(1), N0.getOperand(2));
7565       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7566                          VecOnes);
7567     }
7568
7569     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7570     SDLoc DL(N);
7571     if (SDValue SCC = SimplifySelectCC(
7572             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7573             DAG.getConstant(0, DL, VT),
7574             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7575       return SCC;
7576   }
7577
7578   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7579   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7580       isa<ConstantSDNode>(N0.getOperand(1)) &&
7581       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7582       N0.hasOneUse()) {
7583     SDValue ShAmt = N0.getOperand(1);
7584     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7585     if (N0.getOpcode() == ISD::SHL) {
7586       SDValue InnerZExt = N0.getOperand(0);
7587       // If the original shl may be shifting out bits, do not perform this
7588       // transformation.
7589       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7590         InnerZExt.getOperand(0).getValueSizeInBits();
7591       if (ShAmtVal > KnownZeroBits)
7592         return SDValue();
7593     }
7594
7595     SDLoc DL(N);
7596
7597     // Ensure that the shift amount is wide enough for the shifted value.
7598     if (VT.getSizeInBits() >= 256)
7599       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7600
7601     return DAG.getNode(N0.getOpcode(), DL, VT,
7602                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7603                        ShAmt);
7604   }
7605
7606   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7607     return NewVSel;
7608
7609   return SDValue();
7610 }
7611
7612 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7613   SDValue N0 = N->getOperand(0);
7614   EVT VT = N->getValueType(0);
7615
7616   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7617                                               LegalOperations))
7618     return SDValue(Res, 0);
7619
7620   // fold (aext (aext x)) -> (aext x)
7621   // fold (aext (zext x)) -> (zext x)
7622   // fold (aext (sext x)) -> (sext x)
7623   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7624       N0.getOpcode() == ISD::ZERO_EXTEND ||
7625       N0.getOpcode() == ISD::SIGN_EXTEND)
7626     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7627
7628   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7629   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7630   if (N0.getOpcode() == ISD::TRUNCATE) {
7631     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7632       SDNode *oye = N0.getOperand(0).getNode();
7633       if (NarrowLoad.getNode() != N0.getNode()) {
7634         CombineTo(N0.getNode(), NarrowLoad);
7635         // CombineTo deleted the truncate, if needed, but not what's under it.
7636         AddToWorklist(oye);
7637       }
7638       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7639     }
7640   }
7641
7642   // fold (aext (truncate x))
7643   if (N0.getOpcode() == ISD::TRUNCATE)
7644     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7645
7646   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7647   // if the trunc is not free.
7648   if (N0.getOpcode() == ISD::AND &&
7649       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7650       N0.getOperand(1).getOpcode() == ISD::Constant &&
7651       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7652                           N0.getValueType())) {
7653     SDLoc DL(N);
7654     SDValue X = N0.getOperand(0).getOperand(0);
7655     X = DAG.getAnyExtOrTrunc(X, DL, VT);
7656     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7657     Mask = Mask.zext(VT.getSizeInBits());
7658     return DAG.getNode(ISD::AND, DL, VT,
7659                        X, DAG.getConstant(Mask, DL, VT));
7660   }
7661
7662   // fold (aext (load x)) -> (aext (truncate (extload x)))
7663   // None of the supported targets knows how to perform load and any_ext
7664   // on vectors in one instruction.  We only perform this transformation on
7665   // scalars.
7666   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7667       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7668       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7669     bool DoXform = true;
7670     SmallVector<SDNode*, 4> SetCCs;
7671     if (!N0.hasOneUse())
7672       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7673     if (DoXform) {
7674       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7675       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7676                                        LN0->getChain(),
7677                                        LN0->getBasePtr(), N0.getValueType(),
7678                                        LN0->getMemOperand());
7679       CombineTo(N, ExtLoad);
7680       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7681                                   N0.getValueType(), ExtLoad);
7682       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7683       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7684                       ISD::ANY_EXTEND);
7685       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7686     }
7687   }
7688
7689   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7690   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7691   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7692   if (N0.getOpcode() == ISD::LOAD &&
7693       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7694       N0.hasOneUse()) {
7695     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7696     ISD::LoadExtType ExtType = LN0->getExtensionType();
7697     EVT MemVT = LN0->getMemoryVT();
7698     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7699       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7700                                        VT, LN0->getChain(), LN0->getBasePtr(),
7701                                        MemVT, LN0->getMemOperand());
7702       CombineTo(N, ExtLoad);
7703       CombineTo(N0.getNode(),
7704                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7705                             N0.getValueType(), ExtLoad),
7706                 ExtLoad.getValue(1));
7707       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7708     }
7709   }
7710
7711   if (N0.getOpcode() == ISD::SETCC) {
7712     // For vectors:
7713     // aext(setcc) -> vsetcc
7714     // aext(setcc) -> truncate(vsetcc)
7715     // aext(setcc) -> aext(vsetcc)
7716     // Only do this before legalize for now.
7717     if (VT.isVector() && !LegalOperations) {
7718       EVT N0VT = N0.getOperand(0).getValueType();
7719         // We know that the # elements of the results is the same as the
7720         // # elements of the compare (and the # elements of the compare result
7721         // for that matter).  Check to see that they are the same size.  If so,
7722         // we know that the element size of the sext'd result matches the
7723         // element size of the compare operands.
7724       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7725         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7726                              N0.getOperand(1),
7727                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7728       // If the desired elements are smaller or larger than the source
7729       // elements we can use a matching integer vector type and then
7730       // truncate/any extend
7731       else {
7732         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7733         SDValue VsetCC =
7734           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7735                         N0.getOperand(1),
7736                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7737         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7738       }
7739     }
7740
7741     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7742     SDLoc DL(N);
7743     if (SDValue SCC = SimplifySelectCC(
7744             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7745             DAG.getConstant(0, DL, VT),
7746             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7747       return SCC;
7748   }
7749
7750   return SDValue();
7751 }
7752
7753 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7754   SDValue N0 = N->getOperand(0);
7755   SDValue N1 = N->getOperand(1);
7756   EVT EVT = cast<VTSDNode>(N1)->getVT();
7757
7758   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7759   if (N0.getOpcode() == ISD::AssertZext &&
7760       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7761     return N0;
7762
7763   return SDValue();
7764 }
7765
7766 /// See if the specified operand can be simplified with the knowledge that only
7767 /// the bits specified by Mask are used.  If so, return the simpler operand,
7768 /// otherwise return a null SDValue.
7769 ///
7770 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
7771 /// simplify nodes with multiple uses more aggressively.)
7772 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
7773   switch (V.getOpcode()) {
7774   default: break;
7775   case ISD::Constant: {
7776     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
7777     assert(CV && "Const value should be ConstSDNode.");
7778     const APInt &CVal = CV->getAPIntValue();
7779     APInt NewVal = CVal & Mask;
7780     if (NewVal != CVal)
7781       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
7782     break;
7783   }
7784   case ISD::OR:
7785   case ISD::XOR:
7786     // If the LHS or RHS don't contribute bits to the or, drop them.
7787     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
7788       return V.getOperand(1);
7789     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
7790       return V.getOperand(0);
7791     break;
7792   case ISD::SRL:
7793     // Only look at single-use SRLs.
7794     if (!V.getNode()->hasOneUse())
7795       break;
7796     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
7797       // See if we can recursively simplify the LHS.
7798       unsigned Amt = RHSC->getZExtValue();
7799
7800       // Watch out for shift count overflow though.
7801       if (Amt >= Mask.getBitWidth()) break;
7802       APInt NewMask = Mask << Amt;
7803       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
7804         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
7805                            SimplifyLHS, V.getOperand(1));
7806     }
7807     break;
7808   case ISD::AND: {
7809     // X & -1 -> X (ignoring bits which aren't demanded).
7810     ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
7811     if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
7812       return V.getOperand(0);
7813     break;
7814   }
7815   }
7816   return SDValue();
7817 }
7818
7819 /// If the result of a wider load is shifted to right of N  bits and then
7820 /// truncated to a narrower type and where N is a multiple of number of bits of
7821 /// the narrower type, transform it to a narrower load from address + N / num of
7822 /// bits of new type. If the result is to be extended, also fold the extension
7823 /// to form a extending load.
7824 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7825   unsigned Opc = N->getOpcode();
7826
7827   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7828   SDValue N0 = N->getOperand(0);
7829   EVT VT = N->getValueType(0);
7830   EVT ExtVT = VT;
7831
7832   // This transformation isn't valid for vector loads.
7833   if (VT.isVector())
7834     return SDValue();
7835
7836   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7837   // extended to VT.
7838   if (Opc == ISD::SIGN_EXTEND_INREG) {
7839     ExtType = ISD::SEXTLOAD;
7840     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7841   } else if (Opc == ISD::SRL) {
7842     // Another special-case: SRL is basically zero-extending a narrower value.
7843     ExtType = ISD::ZEXTLOAD;
7844     N0 = SDValue(N, 0);
7845     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7846     if (!N01) return SDValue();
7847     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7848                               VT.getSizeInBits() - N01->getZExtValue());
7849   }
7850   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7851     return SDValue();
7852
7853   unsigned EVTBits = ExtVT.getSizeInBits();
7854
7855   // Do not generate loads of non-round integer types since these can
7856   // be expensive (and would be wrong if the type is not byte sized).
7857   if (!ExtVT.isRound())
7858     return SDValue();
7859
7860   unsigned ShAmt = 0;
7861   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7862     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7863       ShAmt = N01->getZExtValue();
7864       // Is the shift amount a multiple of size of VT?
7865       if ((ShAmt & (EVTBits-1)) == 0) {
7866         N0 = N0.getOperand(0);
7867         // Is the load width a multiple of size of VT?
7868         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7869           return SDValue();
7870       }
7871
7872       // At this point, we must have a load or else we can't do the transform.
7873       if (!isa<LoadSDNode>(N0)) return SDValue();
7874
7875       // Because a SRL must be assumed to *need* to zero-extend the high bits
7876       // (as opposed to anyext the high bits), we can't combine the zextload
7877       // lowering of SRL and an sextload.
7878       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7879         return SDValue();
7880
7881       // If the shift amount is larger than the input type then we're not
7882       // accessing any of the loaded bytes.  If the load was a zextload/extload
7883       // then the result of the shift+trunc is zero/undef (handled elsewhere).
7884       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7885         return SDValue();
7886     }
7887   }
7888
7889   // If the load is shifted left (and the result isn't shifted back right),
7890   // we can fold the truncate through the shift.
7891   unsigned ShLeftAmt = 0;
7892   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7893       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7894     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7895       ShLeftAmt = N01->getZExtValue();
7896       N0 = N0.getOperand(0);
7897     }
7898   }
7899
7900   // If we haven't found a load, we can't narrow it.  Don't transform one with
7901   // multiple uses, this would require adding a new load.
7902   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7903     return SDValue();
7904
7905   // Don't change the width of a volatile load.
7906   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7907   if (LN0->isVolatile())
7908     return SDValue();
7909
7910   // Verify that we are actually reducing a load width here.
7911   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7912     return SDValue();
7913
7914   // For the transform to be legal, the load must produce only two values
7915   // (the value loaded and the chain).  Don't transform a pre-increment
7916   // load, for example, which produces an extra value.  Otherwise the
7917   // transformation is not equivalent, and the downstream logic to replace
7918   // uses gets things wrong.
7919   if (LN0->getNumValues() > 2)
7920     return SDValue();
7921
7922   // If the load that we're shrinking is an extload and we're not just
7923   // discarding the extension we can't simply shrink the load. Bail.
7924   // TODO: It would be possible to merge the extensions in some cases.
7925   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7926       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7927     return SDValue();
7928
7929   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7930     return SDValue();
7931
7932   EVT PtrType = N0.getOperand(1).getValueType();
7933
7934   if (PtrType == MVT::Untyped || PtrType.isExtended())
7935     // It's not possible to generate a constant of extended or untyped type.
7936     return SDValue();
7937
7938   // For big endian targets, we need to adjust the offset to the pointer to
7939   // load the correct bytes.
7940   if (DAG.getDataLayout().isBigEndian()) {
7941     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7942     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7943     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7944   }
7945
7946   uint64_t PtrOff = ShAmt / 8;
7947   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7948   SDLoc DL(LN0);
7949   // The original load itself didn't wrap, so an offset within it doesn't.
7950   SDNodeFlags Flags;
7951   Flags.setNoUnsignedWrap(true);
7952   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7953                                PtrType, LN0->getBasePtr(),
7954                                DAG.getConstant(PtrOff, DL, PtrType),
7955                                Flags);
7956   AddToWorklist(NewPtr.getNode());
7957
7958   SDValue Load;
7959   if (ExtType == ISD::NON_EXTLOAD)
7960     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7961                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7962                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7963   else
7964     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7965                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7966                           NewAlign, LN0->getMemOperand()->getFlags(),
7967                           LN0->getAAInfo());
7968
7969   // Replace the old load's chain with the new load's chain.
7970   WorklistRemover DeadNodes(*this);
7971   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7972
7973   // Shift the result left, if we've swallowed a left shift.
7974   SDValue Result = Load;
7975   if (ShLeftAmt != 0) {
7976     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7977     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7978       ShImmTy = VT;
7979     // If the shift amount is as large as the result size (but, presumably,
7980     // no larger than the source) then the useful bits of the result are
7981     // zero; we can't simply return the shortened shift, because the result
7982     // of that operation is undefined.
7983     SDLoc DL(N0);
7984     if (ShLeftAmt >= VT.getSizeInBits())
7985       Result = DAG.getConstant(0, DL, VT);
7986     else
7987       Result = DAG.getNode(ISD::SHL, DL, VT,
7988                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7989   }
7990
7991   // Return the new loaded value.
7992   return Result;
7993 }
7994
7995 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7996   SDValue N0 = N->getOperand(0);
7997   SDValue N1 = N->getOperand(1);
7998   EVT VT = N->getValueType(0);
7999   EVT EVT = cast<VTSDNode>(N1)->getVT();
8000   unsigned VTBits = VT.getScalarSizeInBits();
8001   unsigned EVTBits = EVT.getScalarSizeInBits();
8002
8003   if (N0.isUndef())
8004     return DAG.getUNDEF(VT);
8005
8006   // fold (sext_in_reg c1) -> c1
8007   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8008     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8009
8010   // If the input is already sign extended, just drop the extension.
8011   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8012     return N0;
8013
8014   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8015   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8016       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8017     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8018                        N0.getOperand(0), N1);
8019
8020   // fold (sext_in_reg (sext x)) -> (sext x)
8021   // fold (sext_in_reg (aext x)) -> (sext x)
8022   // if x is small enough.
8023   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8024     SDValue N00 = N0.getOperand(0);
8025     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8026         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8027       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8028   }
8029
8030   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8031   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8032        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8033        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8034       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8035     if (!LegalOperations ||
8036         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8037       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8038   }
8039
8040   // fold (sext_in_reg (zext x)) -> (sext x)
8041   // iff we are extending the source sign bit.
8042   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8043     SDValue N00 = N0.getOperand(0);
8044     if (N00.getScalarValueSizeInBits() == EVTBits &&
8045         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8046       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8047   }
8048
8049   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8050   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8051     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8052
8053   // fold operands of sext_in_reg based on knowledge that the top bits are not
8054   // demanded.
8055   if (SimplifyDemandedBits(SDValue(N, 0)))
8056     return SDValue(N, 0);
8057
8058   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8059   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8060   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8061     return NarrowLoad;
8062
8063   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8064   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8065   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8066   if (N0.getOpcode() == ISD::SRL) {
8067     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8068       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8069         // We can turn this into an SRA iff the input to the SRL is already sign
8070         // extended enough.
8071         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8072         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8073           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8074                              N0.getOperand(0), N0.getOperand(1));
8075       }
8076   }
8077
8078   // fold (sext_inreg (extload x)) -> (sextload x)
8079   if (ISD::isEXTLoad(N0.getNode()) &&
8080       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8081       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8082       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8083        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8084     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8085     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8086                                      LN0->getChain(),
8087                                      LN0->getBasePtr(), EVT,
8088                                      LN0->getMemOperand());
8089     CombineTo(N, ExtLoad);
8090     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8091     AddToWorklist(ExtLoad.getNode());
8092     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8093   }
8094   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8095   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8096       N0.hasOneUse() &&
8097       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8098       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8099        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8100     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8101     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8102                                      LN0->getChain(),
8103                                      LN0->getBasePtr(), EVT,
8104                                      LN0->getMemOperand());
8105     CombineTo(N, ExtLoad);
8106     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8107     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8108   }
8109
8110   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8111   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8112     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8113                                            N0.getOperand(1), false))
8114       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8115                          BSwap, N1);
8116   }
8117
8118   return SDValue();
8119 }
8120
8121 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8122   SDValue N0 = N->getOperand(0);
8123   EVT VT = N->getValueType(0);
8124
8125   if (N0.isUndef())
8126     return DAG.getUNDEF(VT);
8127
8128   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8129                                               LegalOperations))
8130     return SDValue(Res, 0);
8131
8132   return SDValue();
8133 }
8134
8135 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8136   SDValue N0 = N->getOperand(0);
8137   EVT VT = N->getValueType(0);
8138
8139   if (N0.isUndef())
8140     return DAG.getUNDEF(VT);
8141
8142   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8143                                               LegalOperations))
8144     return SDValue(Res, 0);
8145
8146   return SDValue();
8147 }
8148
8149 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8150   SDValue N0 = N->getOperand(0);
8151   EVT VT = N->getValueType(0);
8152   bool isLE = DAG.getDataLayout().isLittleEndian();
8153
8154   // noop truncate
8155   if (N0.getValueType() == N->getValueType(0))
8156     return N0;
8157   // fold (truncate c1) -> c1
8158   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8159     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8160   // fold (truncate (truncate x)) -> (truncate x)
8161   if (N0.getOpcode() == ISD::TRUNCATE)
8162     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8163   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8164   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8165       N0.getOpcode() == ISD::SIGN_EXTEND ||
8166       N0.getOpcode() == ISD::ANY_EXTEND) {
8167     // if the source is smaller than the dest, we still need an extend.
8168     if (N0.getOperand(0).getValueType().bitsLT(VT))
8169       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8170     // if the source is larger than the dest, than we just need the truncate.
8171     if (N0.getOperand(0).getValueType().bitsGT(VT))
8172       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8173     // if the source and dest are the same type, we can drop both the extend
8174     // and the truncate.
8175     return N0.getOperand(0);
8176   }
8177
8178   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8179   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8180     return SDValue();
8181
8182   // Fold extract-and-trunc into a narrow extract. For example:
8183   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8184   //   i32 y = TRUNCATE(i64 x)
8185   //        -- becomes --
8186   //   v16i8 b = BITCAST (v2i64 val)
8187   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8188   //
8189   // Note: We only run this optimization after type legalization (which often
8190   // creates this pattern) and before operation legalization after which
8191   // we need to be more careful about the vector instructions that we generate.
8192   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8193       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8194
8195     EVT VecTy = N0.getOperand(0).getValueType();
8196     EVT ExTy = N0.getValueType();
8197     EVT TrTy = N->getValueType(0);
8198
8199     unsigned NumElem = VecTy.getVectorNumElements();
8200     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8201
8202     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8203     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8204
8205     SDValue EltNo = N0->getOperand(1);
8206     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8207       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8208       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8209       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8210
8211       SDLoc DL(N);
8212       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8213                          DAG.getBitcast(NVT, N0.getOperand(0)),
8214                          DAG.getConstant(Index, DL, IndexTy));
8215     }
8216   }
8217
8218   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8219   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8220     EVT SrcVT = N0.getValueType();
8221     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8222         TLI.isTruncateFree(SrcVT, VT)) {
8223       SDLoc SL(N0);
8224       SDValue Cond = N0.getOperand(0);
8225       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8226       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8227       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8228     }
8229   }
8230
8231   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8232   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8233       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8234       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8235     SDValue Amt = N0.getOperand(1);
8236     KnownBits Known;
8237     DAG.computeKnownBits(Amt, Known);
8238     unsigned Size = VT.getScalarSizeInBits();
8239     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8240       SDLoc SL(N);
8241       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8242
8243       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8244       if (AmtVT != Amt.getValueType()) {
8245         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8246         AddToWorklist(Amt.getNode());
8247       }
8248       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8249     }
8250   }
8251
8252   // Fold a series of buildvector, bitcast, and truncate if possible.
8253   // For example fold
8254   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8255   //   (2xi32 (buildvector x, y)).
8256   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8257       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8258       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8259       N0.getOperand(0).hasOneUse()) {
8260
8261     SDValue BuildVect = N0.getOperand(0);
8262     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8263     EVT TruncVecEltTy = VT.getVectorElementType();
8264
8265     // Check that the element types match.
8266     if (BuildVectEltTy == TruncVecEltTy) {
8267       // Now we only need to compute the offset of the truncated elements.
8268       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8269       unsigned TruncVecNumElts = VT.getVectorNumElements();
8270       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8271
8272       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8273              "Invalid number of elements");
8274
8275       SmallVector<SDValue, 8> Opnds;
8276       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8277         Opnds.push_back(BuildVect.getOperand(i));
8278
8279       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8280     }
8281   }
8282
8283   // See if we can simplify the input to this truncate through knowledge that
8284   // only the low bits are being used.
8285   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8286   // Currently we only perform this optimization on scalars because vectors
8287   // may have different active low bits.
8288   if (!VT.isVector()) {
8289     if (SDValue Shorter =
8290             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
8291                                                      VT.getSizeInBits())))
8292       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8293   }
8294
8295   // fold (truncate (load x)) -> (smaller load x)
8296   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8297   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8298     if (SDValue Reduced = ReduceLoadWidth(N))
8299       return Reduced;
8300
8301     // Handle the case where the load remains an extending load even
8302     // after truncation.
8303     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8304       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8305       if (!LN0->isVolatile() &&
8306           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8307         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8308                                          VT, LN0->getChain(), LN0->getBasePtr(),
8309                                          LN0->getMemoryVT(),
8310                                          LN0->getMemOperand());
8311         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8312         return NewLoad;
8313       }
8314     }
8315   }
8316
8317   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8318   // where ... are all 'undef'.
8319   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8320     SmallVector<EVT, 8> VTs;
8321     SDValue V;
8322     unsigned Idx = 0;
8323     unsigned NumDefs = 0;
8324
8325     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8326       SDValue X = N0.getOperand(i);
8327       if (!X.isUndef()) {
8328         V = X;
8329         Idx = i;
8330         NumDefs++;
8331       }
8332       // Stop if more than one members are non-undef.
8333       if (NumDefs > 1)
8334         break;
8335       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8336                                      VT.getVectorElementType(),
8337                                      X.getValueType().getVectorNumElements()));
8338     }
8339
8340     if (NumDefs == 0)
8341       return DAG.getUNDEF(VT);
8342
8343     if (NumDefs == 1) {
8344       assert(V.getNode() && "The single defined operand is empty!");
8345       SmallVector<SDValue, 8> Opnds;
8346       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8347         if (i != Idx) {
8348           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8349           continue;
8350         }
8351         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8352         AddToWorklist(NV.getNode());
8353         Opnds.push_back(NV);
8354       }
8355       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8356     }
8357   }
8358
8359   // Fold truncate of a bitcast of a vector to an extract of the low vector
8360   // element.
8361   //
8362   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
8363   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8364     SDValue VecSrc = N0.getOperand(0);
8365     EVT SrcVT = VecSrc.getValueType();
8366     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8367         (!LegalOperations ||
8368          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8369       SDLoc SL(N);
8370
8371       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8372       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8373                          VecSrc, DAG.getConstant(0, SL, IdxVT));
8374     }
8375   }
8376
8377   // Simplify the operands using demanded-bits information.
8378   if (!VT.isVector() &&
8379       SimplifyDemandedBits(SDValue(N, 0)))
8380     return SDValue(N, 0);
8381
8382   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8383   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8384   // When the adde's carry is not used.
8385   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8386       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8387       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8388     SDLoc SL(N);
8389     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8390     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8391     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8392     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8393   }
8394
8395   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8396     return NewVSel;
8397
8398   return SDValue();
8399 }
8400
8401 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8402   SDValue Elt = N->getOperand(i);
8403   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8404     return Elt.getNode();
8405   return Elt.getOperand(Elt.getResNo()).getNode();
8406 }
8407
8408 /// build_pair (load, load) -> load
8409 /// if load locations are consecutive.
8410 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8411   assert(N->getOpcode() == ISD::BUILD_PAIR);
8412
8413   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8414   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8415   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8416       LD1->getAddressSpace() != LD2->getAddressSpace())
8417     return SDValue();
8418   EVT LD1VT = LD1->getValueType(0);
8419   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8420   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8421       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8422     unsigned Align = LD1->getAlignment();
8423     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8424         VT.getTypeForEVT(*DAG.getContext()));
8425
8426     if (NewAlign <= Align &&
8427         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8428       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8429                          LD1->getPointerInfo(), Align);
8430   }
8431
8432   return SDValue();
8433 }
8434
8435 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8436   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8437   // and Lo parts; on big-endian machines it doesn't.
8438   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8439 }
8440
8441 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8442                                     const TargetLowering &TLI) {
8443   // If this is not a bitcast to an FP type or if the target doesn't have
8444   // IEEE754-compliant FP logic, we're done.
8445   EVT VT = N->getValueType(0);
8446   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8447     return SDValue();
8448
8449   // TODO: Use splat values for the constant-checking below and remove this
8450   // restriction.
8451   SDValue N0 = N->getOperand(0);
8452   EVT SourceVT = N0.getValueType();
8453   if (SourceVT.isVector())
8454     return SDValue();
8455
8456   unsigned FPOpcode;
8457   APInt SignMask;
8458   switch (N0.getOpcode()) {
8459   case ISD::AND:
8460     FPOpcode = ISD::FABS;
8461     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8462     break;
8463   case ISD::XOR:
8464     FPOpcode = ISD::FNEG;
8465     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8466     break;
8467   // TODO: ISD::OR --> ISD::FNABS?
8468   default:
8469     return SDValue();
8470   }
8471
8472   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8473   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8474   SDValue LogicOp0 = N0.getOperand(0);
8475   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8476   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8477       LogicOp0.getOpcode() == ISD::BITCAST &&
8478       LogicOp0->getOperand(0).getValueType() == VT)
8479     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8480
8481   return SDValue();
8482 }
8483
8484 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8485   SDValue N0 = N->getOperand(0);
8486   EVT VT = N->getValueType(0);
8487
8488   if (N0.isUndef())
8489     return DAG.getUNDEF(VT);
8490
8491   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8492   // Only do this before legalize, since afterward the target may be depending
8493   // on the bitconvert.
8494   // First check to see if this is all constant.
8495   if (!LegalTypes &&
8496       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8497       VT.isVector()) {
8498     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8499
8500     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8501     assert(!DestEltVT.isVector() &&
8502            "Element type of vector ValueType must not be vector!");
8503     if (isSimple)
8504       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8505   }
8506
8507   // If the input is a constant, let getNode fold it.
8508   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8509     // If we can't allow illegal operations, we need to check that this is just
8510     // a fp -> int or int -> conversion and that the resulting operation will
8511     // be legal.
8512     if (!LegalOperations ||
8513         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8514          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8515         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8516          TLI.isOperationLegal(ISD::Constant, VT)))
8517       return DAG.getBitcast(VT, N0);
8518   }
8519
8520   // (conv (conv x, t1), t2) -> (conv x, t2)
8521   if (N0.getOpcode() == ISD::BITCAST)
8522     return DAG.getBitcast(VT, N0.getOperand(0));
8523
8524   // fold (conv (load x)) -> (load (conv*)x)
8525   // If the resultant load doesn't need a higher alignment than the original!
8526   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8527       // Do not change the width of a volatile load.
8528       !cast<LoadSDNode>(N0)->isVolatile() &&
8529       // Do not remove the cast if the types differ in endian layout.
8530       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8531           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8532       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8533       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8534     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8535     unsigned OrigAlign = LN0->getAlignment();
8536
8537     bool Fast = false;
8538     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8539                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8540         Fast) {
8541       SDValue Load =
8542           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8543                       LN0->getPointerInfo(), OrigAlign,
8544                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8545       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8546       return Load;
8547     }
8548   }
8549
8550   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8551     return V;
8552
8553   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8554   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8555   //
8556   // For ppc_fp128:
8557   // fold (bitcast (fneg x)) ->
8558   //     flipbit = signbit
8559   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8560   //
8561   // fold (bitcast (fabs x)) ->
8562   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8563   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8564   // This often reduces constant pool loads.
8565   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8566        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8567       N0.getNode()->hasOneUse() && VT.isInteger() &&
8568       !VT.isVector() && !N0.getValueType().isVector()) {
8569     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8570     AddToWorklist(NewConv.getNode());
8571
8572     SDLoc DL(N);
8573     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8574       assert(VT.getSizeInBits() == 128);
8575       SDValue SignBit = DAG.getConstant(
8576           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8577       SDValue FlipBit;
8578       if (N0.getOpcode() == ISD::FNEG) {
8579         FlipBit = SignBit;
8580         AddToWorklist(FlipBit.getNode());
8581       } else {
8582         assert(N0.getOpcode() == ISD::FABS);
8583         SDValue Hi =
8584             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8585                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8586                                               SDLoc(NewConv)));
8587         AddToWorklist(Hi.getNode());
8588         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8589         AddToWorklist(FlipBit.getNode());
8590       }
8591       SDValue FlipBits =
8592           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8593       AddToWorklist(FlipBits.getNode());
8594       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8595     }
8596     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8597     if (N0.getOpcode() == ISD::FNEG)
8598       return DAG.getNode(ISD::XOR, DL, VT,
8599                          NewConv, DAG.getConstant(SignBit, DL, VT));
8600     assert(N0.getOpcode() == ISD::FABS);
8601     return DAG.getNode(ISD::AND, DL, VT,
8602                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8603   }
8604
8605   // fold (bitconvert (fcopysign cst, x)) ->
8606   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8607   // Note that we don't handle (copysign x, cst) because this can always be
8608   // folded to an fneg or fabs.
8609   //
8610   // For ppc_fp128:
8611   // fold (bitcast (fcopysign cst, x)) ->
8612   //     flipbit = (and (extract_element
8613   //                     (xor (bitcast cst), (bitcast x)), 0),
8614   //                    signbit)
8615   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8616   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8617       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8618       VT.isInteger() && !VT.isVector()) {
8619     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8620     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8621     if (isTypeLegal(IntXVT)) {
8622       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8623       AddToWorklist(X.getNode());
8624
8625       // If X has a different width than the result/lhs, sext it or truncate it.
8626       unsigned VTWidth = VT.getSizeInBits();
8627       if (OrigXWidth < VTWidth) {
8628         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8629         AddToWorklist(X.getNode());
8630       } else if (OrigXWidth > VTWidth) {
8631         // To get the sign bit in the right place, we have to shift it right
8632         // before truncating.
8633         SDLoc DL(X);
8634         X = DAG.getNode(ISD::SRL, DL,
8635                         X.getValueType(), X,
8636                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8637                                         X.getValueType()));
8638         AddToWorklist(X.getNode());
8639         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8640         AddToWorklist(X.getNode());
8641       }
8642
8643       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8644         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8645         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8646         AddToWorklist(Cst.getNode());
8647         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8648         AddToWorklist(X.getNode());
8649         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8650         AddToWorklist(XorResult.getNode());
8651         SDValue XorResult64 = DAG.getNode(
8652             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8653             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8654                                   SDLoc(XorResult)));
8655         AddToWorklist(XorResult64.getNode());
8656         SDValue FlipBit =
8657             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8658                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8659         AddToWorklist(FlipBit.getNode());
8660         SDValue FlipBits =
8661             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8662         AddToWorklist(FlipBits.getNode());
8663         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8664       }
8665       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8666       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8667                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8668       AddToWorklist(X.getNode());
8669
8670       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8671       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8672                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8673       AddToWorklist(Cst.getNode());
8674
8675       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8676     }
8677   }
8678
8679   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8680   if (N0.getOpcode() == ISD::BUILD_PAIR)
8681     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8682       return CombineLD;
8683
8684   // Remove double bitcasts from shuffles - this is often a legacy of
8685   // XformToShuffleWithZero being used to combine bitmaskings (of
8686   // float vectors bitcast to integer vectors) into shuffles.
8687   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8688   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8689       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8690       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8691       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8692     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8693
8694     // If operands are a bitcast, peek through if it casts the original VT.
8695     // If operands are a constant, just bitcast back to original VT.
8696     auto PeekThroughBitcast = [&](SDValue Op) {
8697       if (Op.getOpcode() == ISD::BITCAST &&
8698           Op.getOperand(0).getValueType() == VT)
8699         return SDValue(Op.getOperand(0));
8700       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8701           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8702         return DAG.getBitcast(VT, Op);
8703       return SDValue();
8704     };
8705
8706     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8707     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8708     if (!(SV0 && SV1))
8709       return SDValue();
8710
8711     int MaskScale =
8712         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8713     SmallVector<int, 8> NewMask;
8714     for (int M : SVN->getMask())
8715       for (int i = 0; i != MaskScale; ++i)
8716         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8717
8718     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8719     if (!LegalMask) {
8720       std::swap(SV0, SV1);
8721       ShuffleVectorSDNode::commuteMask(NewMask);
8722       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8723     }
8724
8725     if (LegalMask)
8726       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8727   }
8728
8729   return SDValue();
8730 }
8731
8732 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8733   EVT VT = N->getValueType(0);
8734   return CombineConsecutiveLoads(N, VT);
8735 }
8736
8737 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8738 /// operands. DstEltVT indicates the destination element value type.
8739 SDValue DAGCombiner::
8740 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8741   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8742
8743   // If this is already the right type, we're done.
8744   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8745
8746   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8747   unsigned DstBitSize = DstEltVT.getSizeInBits();
8748
8749   // If this is a conversion of N elements of one type to N elements of another
8750   // type, convert each element.  This handles FP<->INT cases.
8751   if (SrcBitSize == DstBitSize) {
8752     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8753                               BV->getValueType(0).getVectorNumElements());
8754
8755     // Due to the FP element handling below calling this routine recursively,
8756     // we can end up with a scalar-to-vector node here.
8757     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8758       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8759                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8760
8761     SmallVector<SDValue, 8> Ops;
8762     for (SDValue Op : BV->op_values()) {
8763       // If the vector element type is not legal, the BUILD_VECTOR operands
8764       // are promoted and implicitly truncated.  Make that explicit here.
8765       if (Op.getValueType() != SrcEltVT)
8766         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8767       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8768       AddToWorklist(Ops.back().getNode());
8769     }
8770     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8771   }
8772
8773   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8774   // handle annoying details of growing/shrinking FP values, we convert them to
8775   // int first.
8776   if (SrcEltVT.isFloatingPoint()) {
8777     // Convert the input float vector to a int vector where the elements are the
8778     // same sizes.
8779     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8780     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8781     SrcEltVT = IntVT;
8782   }
8783
8784   // Now we know the input is an integer vector.  If the output is a FP type,
8785   // convert to integer first, then to FP of the right size.
8786   if (DstEltVT.isFloatingPoint()) {
8787     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8788     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8789
8790     // Next, convert to FP elements of the same size.
8791     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8792   }
8793
8794   SDLoc DL(BV);
8795
8796   // Okay, we know the src/dst types are both integers of differing types.
8797   // Handling growing first.
8798   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8799   if (SrcBitSize < DstBitSize) {
8800     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8801
8802     SmallVector<SDValue, 8> Ops;
8803     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8804          i += NumInputsPerOutput) {
8805       bool isLE = DAG.getDataLayout().isLittleEndian();
8806       APInt NewBits = APInt(DstBitSize, 0);
8807       bool EltIsUndef = true;
8808       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8809         // Shift the previously computed bits over.
8810         NewBits <<= SrcBitSize;
8811         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8812         if (Op.isUndef()) continue;
8813         EltIsUndef = false;
8814
8815         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8816                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8817       }
8818
8819       if (EltIsUndef)
8820         Ops.push_back(DAG.getUNDEF(DstEltVT));
8821       else
8822         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8823     }
8824
8825     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8826     return DAG.getBuildVector(VT, DL, Ops);
8827   }
8828
8829   // Finally, this must be the case where we are shrinking elements: each input
8830   // turns into multiple outputs.
8831   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8832   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8833                             NumOutputsPerInput*BV->getNumOperands());
8834   SmallVector<SDValue, 8> Ops;
8835
8836   for (const SDValue &Op : BV->op_values()) {
8837     if (Op.isUndef()) {
8838       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8839       continue;
8840     }
8841
8842     APInt OpVal = cast<ConstantSDNode>(Op)->
8843                   getAPIntValue().zextOrTrunc(SrcBitSize);
8844
8845     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8846       APInt ThisVal = OpVal.trunc(DstBitSize);
8847       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8848       OpVal.lshrInPlace(DstBitSize);
8849     }
8850
8851     // For big endian targets, swap the order of the pieces of each element.
8852     if (DAG.getDataLayout().isBigEndian())
8853       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8854   }
8855
8856   return DAG.getBuildVector(VT, DL, Ops);
8857 }
8858
8859 static bool isContractable(SDNode *N) {
8860   SDNodeFlags F = N->getFlags();
8861   return F.hasAllowContract() || F.hasUnsafeAlgebra();
8862 }
8863
8864 /// Try to perform FMA combining on a given FADD node.
8865 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8866   SDValue N0 = N->getOperand(0);
8867   SDValue N1 = N->getOperand(1);
8868   EVT VT = N->getValueType(0);
8869   SDLoc SL(N);
8870
8871   const TargetOptions &Options = DAG.getTarget().Options;
8872
8873   // Floating-point multiply-add with intermediate rounding.
8874   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8875
8876   // Floating-point multiply-add without intermediate rounding.
8877   bool HasFMA =
8878       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8879       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8880
8881   // No valid opcode, do not combine.
8882   if (!HasFMAD && !HasFMA)
8883     return SDValue();
8884
8885   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
8886                               Options.UnsafeFPMath || HasFMAD);
8887   // If the addition is not contractable, do not combine.
8888   if (!AllowFusionGlobally && !isContractable(N))
8889     return SDValue();
8890
8891   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8892   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
8893     return SDValue();
8894
8895   // Always prefer FMAD to FMA for precision.
8896   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8897   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8898   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8899
8900   // Is the node an FMUL and contractable either due to global flags or
8901   // SDNodeFlags.
8902   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
8903     if (N.getOpcode() != ISD::FMUL)
8904       return false;
8905     return AllowFusionGlobally || isContractable(N.getNode());
8906   };
8907   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8908   // prefer to fold the multiply with fewer uses.
8909   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
8910     if (N0.getNode()->use_size() > N1.getNode()->use_size())
8911       std::swap(N0, N1);
8912   }
8913
8914   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
8915   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
8916     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8917                        N0.getOperand(0), N0.getOperand(1), N1);
8918   }
8919
8920   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
8921   // Note: Commutes FADD operands.
8922   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
8923     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8924                        N1.getOperand(0), N1.getOperand(1), N0);
8925   }
8926
8927   // Look through FP_EXTEND nodes to do more combining.
8928   if (LookThroughFPExt) {
8929     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8930     if (N0.getOpcode() == ISD::FP_EXTEND) {
8931       SDValue N00 = N0.getOperand(0);
8932       if (isContractableFMUL(N00))
8933         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8934                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8935                                        N00.getOperand(0)),
8936                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8937                                        N00.getOperand(1)), N1);
8938     }
8939
8940     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
8941     // Note: Commutes FADD operands.
8942     if (N1.getOpcode() == ISD::FP_EXTEND) {
8943       SDValue N10 = N1.getOperand(0);
8944       if (isContractableFMUL(N10))
8945         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8946                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8947                                        N10.getOperand(0)),
8948                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8949                                        N10.getOperand(1)), N0);
8950     }
8951   }
8952
8953   // More folding opportunities when target permits.
8954   if (Aggressive) {
8955     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
8956     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8957     // are currently only supported on binary nodes.
8958     if (Options.UnsafeFPMath &&
8959         N0.getOpcode() == PreferredFusedOpcode &&
8960         N0.getOperand(2).getOpcode() == ISD::FMUL &&
8961         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
8962       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8963                          N0.getOperand(0), N0.getOperand(1),
8964                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8965                                      N0.getOperand(2).getOperand(0),
8966                                      N0.getOperand(2).getOperand(1),
8967                                      N1));
8968     }
8969
8970     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
8971     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8972     // are currently only supported on binary nodes.
8973     if (Options.UnsafeFPMath &&
8974         N1->getOpcode() == PreferredFusedOpcode &&
8975         N1.getOperand(2).getOpcode() == ISD::FMUL &&
8976         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
8977       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8978                          N1.getOperand(0), N1.getOperand(1),
8979                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8980                                      N1.getOperand(2).getOperand(0),
8981                                      N1.getOperand(2).getOperand(1),
8982                                      N0));
8983     }
8984
8985     if (LookThroughFPExt) {
8986       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8987       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
8988       auto FoldFAddFMAFPExtFMul = [&] (
8989           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8990         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8991                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8992                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8993                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8994                                        Z));
8995       };
8996       if (N0.getOpcode() == PreferredFusedOpcode) {
8997         SDValue N02 = N0.getOperand(2);
8998         if (N02.getOpcode() == ISD::FP_EXTEND) {
8999           SDValue N020 = N02.getOperand(0);
9000           if (isContractableFMUL(N020))
9001             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9002                                         N020.getOperand(0), N020.getOperand(1),
9003                                         N1);
9004         }
9005       }
9006
9007       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9008       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9009       // FIXME: This turns two single-precision and one double-precision
9010       // operation into two double-precision operations, which might not be
9011       // interesting for all targets, especially GPUs.
9012       auto FoldFAddFPExtFMAFMul = [&] (
9013           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9014         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9015                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9016                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9017                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9018                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9019                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9020                                        Z));
9021       };
9022       if (N0.getOpcode() == ISD::FP_EXTEND) {
9023         SDValue N00 = N0.getOperand(0);
9024         if (N00.getOpcode() == PreferredFusedOpcode) {
9025           SDValue N002 = N00.getOperand(2);
9026           if (isContractableFMUL(N002))
9027             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9028                                         N002.getOperand(0), N002.getOperand(1),
9029                                         N1);
9030         }
9031       }
9032
9033       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9034       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9035       if (N1.getOpcode() == PreferredFusedOpcode) {
9036         SDValue N12 = N1.getOperand(2);
9037         if (N12.getOpcode() == ISD::FP_EXTEND) {
9038           SDValue N120 = N12.getOperand(0);
9039           if (isContractableFMUL(N120))
9040             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9041                                         N120.getOperand(0), N120.getOperand(1),
9042                                         N0);
9043         }
9044       }
9045
9046       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9047       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9048       // FIXME: This turns two single-precision and one double-precision
9049       // operation into two double-precision operations, which might not be
9050       // interesting for all targets, especially GPUs.
9051       if (N1.getOpcode() == ISD::FP_EXTEND) {
9052         SDValue N10 = N1.getOperand(0);
9053         if (N10.getOpcode() == PreferredFusedOpcode) {
9054           SDValue N102 = N10.getOperand(2);
9055           if (isContractableFMUL(N102))
9056             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9057                                         N102.getOperand(0), N102.getOperand(1),
9058                                         N0);
9059         }
9060       }
9061     }
9062   }
9063
9064   return SDValue();
9065 }
9066
9067 /// Try to perform FMA combining on a given FSUB node.
9068 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9069   SDValue N0 = N->getOperand(0);
9070   SDValue N1 = N->getOperand(1);
9071   EVT VT = N->getValueType(0);
9072   SDLoc SL(N);
9073
9074   const TargetOptions &Options = DAG.getTarget().Options;
9075   // Floating-point multiply-add with intermediate rounding.
9076   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9077
9078   // Floating-point multiply-add without intermediate rounding.
9079   bool HasFMA =
9080       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9081       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9082
9083   // No valid opcode, do not combine.
9084   if (!HasFMAD && !HasFMA)
9085     return SDValue();
9086
9087   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9088                               Options.UnsafeFPMath || HasFMAD);
9089   // If the subtraction is not contractable, do not combine.
9090   if (!AllowFusionGlobally && !isContractable(N))
9091     return SDValue();
9092
9093   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9094   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9095     return SDValue();
9096
9097   // Always prefer FMAD to FMA for precision.
9098   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9099   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9100   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9101
9102   // Is the node an FMUL and contractable either due to global flags or
9103   // SDNodeFlags.
9104   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9105     if (N.getOpcode() != ISD::FMUL)
9106       return false;
9107     return AllowFusionGlobally || isContractable(N.getNode());
9108   };
9109
9110   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9111   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9112     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9113                        N0.getOperand(0), N0.getOperand(1),
9114                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9115   }
9116
9117   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9118   // Note: Commutes FSUB operands.
9119   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9120     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9121                        DAG.getNode(ISD::FNEG, SL, VT,
9122                                    N1.getOperand(0)),
9123                        N1.getOperand(1), N0);
9124
9125   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9126   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9127       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9128     SDValue N00 = N0.getOperand(0).getOperand(0);
9129     SDValue N01 = N0.getOperand(0).getOperand(1);
9130     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9131                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9132                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9133   }
9134
9135   // Look through FP_EXTEND nodes to do more combining.
9136   if (LookThroughFPExt) {
9137     // fold (fsub (fpext (fmul x, y)), z)
9138     //   -> (fma (fpext x), (fpext y), (fneg z))
9139     if (N0.getOpcode() == ISD::FP_EXTEND) {
9140       SDValue N00 = N0.getOperand(0);
9141       if (isContractableFMUL(N00))
9142         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9143                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9144                                        N00.getOperand(0)),
9145                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9146                                        N00.getOperand(1)),
9147                            DAG.getNode(ISD::FNEG, SL, VT, N1));
9148     }
9149
9150     // fold (fsub x, (fpext (fmul y, z)))
9151     //   -> (fma (fneg (fpext y)), (fpext z), x)
9152     // Note: Commutes FSUB operands.
9153     if (N1.getOpcode() == ISD::FP_EXTEND) {
9154       SDValue N10 = N1.getOperand(0);
9155       if (isContractableFMUL(N10))
9156         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9157                            DAG.getNode(ISD::FNEG, SL, VT,
9158                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9159                                                    N10.getOperand(0))),
9160                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9161                                        N10.getOperand(1)),
9162                            N0);
9163     }
9164
9165     // fold (fsub (fpext (fneg (fmul, x, y))), z)
9166     //   -> (fneg (fma (fpext x), (fpext y), z))
9167     // Note: This could be removed with appropriate canonicalization of the
9168     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9169     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9170     // from implementing the canonicalization in visitFSUB.
9171     if (N0.getOpcode() == ISD::FP_EXTEND) {
9172       SDValue N00 = N0.getOperand(0);
9173       if (N00.getOpcode() == ISD::FNEG) {
9174         SDValue N000 = N00.getOperand(0);
9175         if (isContractableFMUL(N000)) {
9176           return DAG.getNode(ISD::FNEG, SL, VT,
9177                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9178                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9179                                                      N000.getOperand(0)),
9180                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9181                                                      N000.getOperand(1)),
9182                                          N1));
9183         }
9184       }
9185     }
9186
9187     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9188     //   -> (fneg (fma (fpext x)), (fpext y), z)
9189     // Note: This could be removed with appropriate canonicalization of the
9190     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9191     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9192     // from implementing the canonicalization in visitFSUB.
9193     if (N0.getOpcode() == ISD::FNEG) {
9194       SDValue N00 = N0.getOperand(0);
9195       if (N00.getOpcode() == ISD::FP_EXTEND) {
9196         SDValue N000 = N00.getOperand(0);
9197         if (isContractableFMUL(N000)) {
9198           return DAG.getNode(ISD::FNEG, SL, VT,
9199                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9200                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9201                                                      N000.getOperand(0)),
9202                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9203                                                      N000.getOperand(1)),
9204                                          N1));
9205         }
9206       }
9207     }
9208
9209   }
9210
9211   // More folding opportunities when target permits.
9212   if (Aggressive) {
9213     // fold (fsub (fma x, y, (fmul u, v)), z)
9214     //   -> (fma x, y (fma u, v, (fneg z)))
9215     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9216     // are currently only supported on binary nodes.
9217     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9218         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9219         N0.getOperand(2)->hasOneUse()) {
9220       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9221                          N0.getOperand(0), N0.getOperand(1),
9222                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9223                                      N0.getOperand(2).getOperand(0),
9224                                      N0.getOperand(2).getOperand(1),
9225                                      DAG.getNode(ISD::FNEG, SL, VT,
9226                                                  N1)));
9227     }
9228
9229     // fold (fsub x, (fma y, z, (fmul u, v)))
9230     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9231     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9232     // are currently only supported on binary nodes.
9233     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9234         isContractableFMUL(N1.getOperand(2))) {
9235       SDValue N20 = N1.getOperand(2).getOperand(0);
9236       SDValue N21 = N1.getOperand(2).getOperand(1);
9237       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9238                          DAG.getNode(ISD::FNEG, SL, VT,
9239                                      N1.getOperand(0)),
9240                          N1.getOperand(1),
9241                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9242                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9243
9244                                      N21, N0));
9245     }
9246
9247     if (LookThroughFPExt) {
9248       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9249       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9250       if (N0.getOpcode() == PreferredFusedOpcode) {
9251         SDValue N02 = N0.getOperand(2);
9252         if (N02.getOpcode() == ISD::FP_EXTEND) {
9253           SDValue N020 = N02.getOperand(0);
9254           if (isContractableFMUL(N020))
9255             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9256                                N0.getOperand(0), N0.getOperand(1),
9257                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9258                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9259                                                        N020.getOperand(0)),
9260                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9261                                                        N020.getOperand(1)),
9262                                            DAG.getNode(ISD::FNEG, SL, VT,
9263                                                        N1)));
9264         }
9265       }
9266
9267       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9268       //   -> (fma (fpext x), (fpext y),
9269       //           (fma (fpext u), (fpext v), (fneg z)))
9270       // FIXME: This turns two single-precision and one double-precision
9271       // operation into two double-precision operations, which might not be
9272       // interesting for all targets, especially GPUs.
9273       if (N0.getOpcode() == ISD::FP_EXTEND) {
9274         SDValue N00 = N0.getOperand(0);
9275         if (N00.getOpcode() == PreferredFusedOpcode) {
9276           SDValue N002 = N00.getOperand(2);
9277           if (isContractableFMUL(N002))
9278             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9279                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9280                                            N00.getOperand(0)),
9281                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9282                                            N00.getOperand(1)),
9283                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9284                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9285                                                        N002.getOperand(0)),
9286                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9287                                                        N002.getOperand(1)),
9288                                            DAG.getNode(ISD::FNEG, SL, VT,
9289                                                        N1)));
9290         }
9291       }
9292
9293       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9294       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9295       if (N1.getOpcode() == PreferredFusedOpcode &&
9296         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9297         SDValue N120 = N1.getOperand(2).getOperand(0);
9298         if (isContractableFMUL(N120)) {
9299           SDValue N1200 = N120.getOperand(0);
9300           SDValue N1201 = N120.getOperand(1);
9301           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9302                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9303                              N1.getOperand(1),
9304                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9305                                          DAG.getNode(ISD::FNEG, SL, VT,
9306                                              DAG.getNode(ISD::FP_EXTEND, SL,
9307                                                          VT, N1200)),
9308                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9309                                                      N1201),
9310                                          N0));
9311         }
9312       }
9313
9314       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9315       //   -> (fma (fneg (fpext y)), (fpext z),
9316       //           (fma (fneg (fpext u)), (fpext v), x))
9317       // FIXME: This turns two single-precision and one double-precision
9318       // operation into two double-precision operations, which might not be
9319       // interesting for all targets, especially GPUs.
9320       if (N1.getOpcode() == ISD::FP_EXTEND &&
9321         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9322         SDValue N100 = N1.getOperand(0).getOperand(0);
9323         SDValue N101 = N1.getOperand(0).getOperand(1);
9324         SDValue N102 = N1.getOperand(0).getOperand(2);
9325         if (isContractableFMUL(N102)) {
9326           SDValue N1020 = N102.getOperand(0);
9327           SDValue N1021 = N102.getOperand(1);
9328           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9329                              DAG.getNode(ISD::FNEG, SL, VT,
9330                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9331                                                      N100)),
9332                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9333                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9334                                          DAG.getNode(ISD::FNEG, SL, VT,
9335                                              DAG.getNode(ISD::FP_EXTEND, SL,
9336                                                          VT, N1020)),
9337                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9338                                                      N1021),
9339                                          N0));
9340         }
9341       }
9342     }
9343   }
9344
9345   return SDValue();
9346 }
9347
9348 /// Try to perform FMA combining on a given FMUL node based on the distributive
9349 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9350 /// subtraction instead of addition).
9351 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9352   SDValue N0 = N->getOperand(0);
9353   SDValue N1 = N->getOperand(1);
9354   EVT VT = N->getValueType(0);
9355   SDLoc SL(N);
9356
9357   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9358
9359   const TargetOptions &Options = DAG.getTarget().Options;
9360
9361   // The transforms below are incorrect when x == 0 and y == inf, because the
9362   // intermediate multiplication produces a nan.
9363   if (!Options.NoInfsFPMath)
9364     return SDValue();
9365
9366   // Floating-point multiply-add without intermediate rounding.
9367   bool HasFMA =
9368       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9369       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9370       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9371
9372   // Floating-point multiply-add with intermediate rounding. This can result
9373   // in a less precise result due to the changed rounding order.
9374   bool HasFMAD = Options.UnsafeFPMath &&
9375                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9376
9377   // No valid opcode, do not combine.
9378   if (!HasFMAD && !HasFMA)
9379     return SDValue();
9380
9381   // Always prefer FMAD to FMA for precision.
9382   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9383   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9384
9385   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9386   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9387   auto FuseFADD = [&](SDValue X, SDValue Y) {
9388     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9389       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9390       if (XC1 && XC1->isExactlyValue(+1.0))
9391         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9392       if (XC1 && XC1->isExactlyValue(-1.0))
9393         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9394                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9395     }
9396     return SDValue();
9397   };
9398
9399   if (SDValue FMA = FuseFADD(N0, N1))
9400     return FMA;
9401   if (SDValue FMA = FuseFADD(N1, N0))
9402     return FMA;
9403
9404   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9405   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9406   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9407   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9408   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9409     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9410       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9411       if (XC0 && XC0->isExactlyValue(+1.0))
9412         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9413                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9414                            Y);
9415       if (XC0 && XC0->isExactlyValue(-1.0))
9416         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9417                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9418                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9419
9420       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9421       if (XC1 && XC1->isExactlyValue(+1.0))
9422         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9423                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9424       if (XC1 && XC1->isExactlyValue(-1.0))
9425         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9426     }
9427     return SDValue();
9428   };
9429
9430   if (SDValue FMA = FuseFSUB(N0, N1))
9431     return FMA;
9432   if (SDValue FMA = FuseFSUB(N1, N0))
9433     return FMA;
9434
9435   return SDValue();
9436 }
9437
9438 static bool isFMulNegTwo(SDValue &N) {
9439   if (N.getOpcode() != ISD::FMUL)
9440     return false;
9441   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9442     return CFP->isExactlyValue(-2.0);
9443   return false;
9444 }
9445
9446 SDValue DAGCombiner::visitFADD(SDNode *N) {
9447   SDValue N0 = N->getOperand(0);
9448   SDValue N1 = N->getOperand(1);
9449   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9450   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9451   EVT VT = N->getValueType(0);
9452   SDLoc DL(N);
9453   const TargetOptions &Options = DAG.getTarget().Options;
9454   const SDNodeFlags Flags = N->getFlags();
9455
9456   // fold vector ops
9457   if (VT.isVector())
9458     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9459       return FoldedVOp;
9460
9461   // fold (fadd c1, c2) -> c1 + c2
9462   if (N0CFP && N1CFP)
9463     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9464
9465   // canonicalize constant to RHS
9466   if (N0CFP && !N1CFP)
9467     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9468
9469   if (SDValue NewSel = foldBinOpIntoSelect(N))
9470     return NewSel;
9471
9472   // fold (fadd A, (fneg B)) -> (fsub A, B)
9473   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9474       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9475     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9476                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9477
9478   // fold (fadd (fneg A), B) -> (fsub B, A)
9479   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9480       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9481     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9482                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9483
9484   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9485   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9486   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9487       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9488     bool N1IsFMul = isFMulNegTwo(N1);
9489     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9490     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9491     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9492   }
9493
9494   // FIXME: Auto-upgrade the target/function-level option.
9495   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9496     // fold (fadd A, 0) -> A
9497     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9498       if (N1C->isZero())
9499         return N0;
9500   }
9501
9502   // If 'unsafe math' is enabled, fold lots of things.
9503   if (Options.UnsafeFPMath) {
9504     // No FP constant should be created after legalization as Instruction
9505     // Selection pass has a hard time dealing with FP constants.
9506     bool AllowNewConst = (Level < AfterLegalizeDAG);
9507
9508     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9509     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9510         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9511       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9512                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9513                                      Flags),
9514                          Flags);
9515
9516     // If allowed, fold (fadd (fneg x), x) -> 0.0
9517     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9518       return DAG.getConstantFP(0.0, DL, VT);
9519
9520     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9521     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9522       return DAG.getConstantFP(0.0, DL, VT);
9523
9524     // We can fold chains of FADD's of the same value into multiplications.
9525     // This transform is not safe in general because we are reducing the number
9526     // of rounding steps.
9527     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9528       if (N0.getOpcode() == ISD::FMUL) {
9529         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9530         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9531
9532         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9533         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9534           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9535                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9536           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9537         }
9538
9539         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9540         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9541             N1.getOperand(0) == N1.getOperand(1) &&
9542             N0.getOperand(0) == N1.getOperand(0)) {
9543           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9544                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9545           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9546         }
9547       }
9548
9549       if (N1.getOpcode() == ISD::FMUL) {
9550         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9551         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9552
9553         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9554         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9555           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9556                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9557           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9558         }
9559
9560         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9561         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9562             N0.getOperand(0) == N0.getOperand(1) &&
9563             N1.getOperand(0) == N0.getOperand(0)) {
9564           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9565                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9566           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9567         }
9568       }
9569
9570       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9571         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9572         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9573         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9574             (N0.getOperand(0) == N1)) {
9575           return DAG.getNode(ISD::FMUL, DL, VT,
9576                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9577         }
9578       }
9579
9580       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9581         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9582         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9583         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9584             N1.getOperand(0) == N0) {
9585           return DAG.getNode(ISD::FMUL, DL, VT,
9586                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9587         }
9588       }
9589
9590       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9591       if (AllowNewConst &&
9592           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9593           N0.getOperand(0) == N0.getOperand(1) &&
9594           N1.getOperand(0) == N1.getOperand(1) &&
9595           N0.getOperand(0) == N1.getOperand(0)) {
9596         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9597                            DAG.getConstantFP(4.0, DL, VT), Flags);
9598       }
9599     }
9600   } // enable-unsafe-fp-math
9601
9602   // FADD -> FMA combines:
9603   if (SDValue Fused = visitFADDForFMACombine(N)) {
9604     AddToWorklist(Fused.getNode());
9605     return Fused;
9606   }
9607   return SDValue();
9608 }
9609
9610 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9611   SDValue N0 = N->getOperand(0);
9612   SDValue N1 = N->getOperand(1);
9613   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9614   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9615   EVT VT = N->getValueType(0);
9616   SDLoc DL(N);
9617   const TargetOptions &Options = DAG.getTarget().Options;
9618   const SDNodeFlags Flags = N->getFlags();
9619
9620   // fold vector ops
9621   if (VT.isVector())
9622     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9623       return FoldedVOp;
9624
9625   // fold (fsub c1, c2) -> c1-c2
9626   if (N0CFP && N1CFP)
9627     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9628
9629   if (SDValue NewSel = foldBinOpIntoSelect(N))
9630     return NewSel;
9631
9632   // fold (fsub A, (fneg B)) -> (fadd A, B)
9633   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9634     return DAG.getNode(ISD::FADD, DL, VT, N0,
9635                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9636
9637   // FIXME: Auto-upgrade the target/function-level option.
9638   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
9639     // (fsub 0, B) -> -B
9640     if (N0CFP && N0CFP->isZero()) {
9641       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9642         return GetNegatedExpression(N1, DAG, LegalOperations);
9643       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9644         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9645     }
9646   }
9647
9648   // If 'unsafe math' is enabled, fold lots of things.
9649   if (Options.UnsafeFPMath) {
9650     // (fsub A, 0) -> A
9651     if (N1CFP && N1CFP->isZero())
9652       return N0;
9653
9654     // (fsub x, x) -> 0.0
9655     if (N0 == N1)
9656       return DAG.getConstantFP(0.0f, DL, VT);
9657
9658     // (fsub x, (fadd x, y)) -> (fneg y)
9659     // (fsub x, (fadd y, x)) -> (fneg y)
9660     if (N1.getOpcode() == ISD::FADD) {
9661       SDValue N10 = N1->getOperand(0);
9662       SDValue N11 = N1->getOperand(1);
9663
9664       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9665         return GetNegatedExpression(N11, DAG, LegalOperations);
9666
9667       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9668         return GetNegatedExpression(N10, DAG, LegalOperations);
9669     }
9670   }
9671
9672   // FSUB -> FMA combines:
9673   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9674     AddToWorklist(Fused.getNode());
9675     return Fused;
9676   }
9677
9678   return SDValue();
9679 }
9680
9681 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9682   SDValue N0 = N->getOperand(0);
9683   SDValue N1 = N->getOperand(1);
9684   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9685   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9686   EVT VT = N->getValueType(0);
9687   SDLoc DL(N);
9688   const TargetOptions &Options = DAG.getTarget().Options;
9689   const SDNodeFlags Flags = N->getFlags();
9690
9691   // fold vector ops
9692   if (VT.isVector()) {
9693     // This just handles C1 * C2 for vectors. Other vector folds are below.
9694     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9695       return FoldedVOp;
9696   }
9697
9698   // fold (fmul c1, c2) -> c1*c2
9699   if (N0CFP && N1CFP)
9700     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9701
9702   // canonicalize constant to RHS
9703   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9704      !isConstantFPBuildVectorOrConstantFP(N1))
9705     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9706
9707   // fold (fmul A, 1.0) -> A
9708   if (N1CFP && N1CFP->isExactlyValue(1.0))
9709     return N0;
9710
9711   if (SDValue NewSel = foldBinOpIntoSelect(N))
9712     return NewSel;
9713
9714   if (Options.UnsafeFPMath) {
9715     // fold (fmul A, 0) -> 0
9716     if (N1CFP && N1CFP->isZero())
9717       return N1;
9718
9719     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9720     if (N0.getOpcode() == ISD::FMUL) {
9721       // Fold scalars or any vector constants (not just splats).
9722       // This fold is done in general by InstCombine, but extra fmul insts
9723       // may have been generated during lowering.
9724       SDValue N00 = N0.getOperand(0);
9725       SDValue N01 = N0.getOperand(1);
9726       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9727       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9728       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9729
9730       // Check 1: Make sure that the first operand of the inner multiply is NOT
9731       // a constant. Otherwise, we may induce infinite looping.
9732       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9733         // Check 2: Make sure that the second operand of the inner multiply and
9734         // the second operand of the outer multiply are constants.
9735         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9736             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9737           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9738           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9739         }
9740       }
9741     }
9742
9743     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9744     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9745     // during an early run of DAGCombiner can prevent folding with fmuls
9746     // inserted during lowering.
9747     if (N0.getOpcode() == ISD::FADD &&
9748         (N0.getOperand(0) == N0.getOperand(1)) &&
9749         N0.hasOneUse()) {
9750       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9751       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9752       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9753     }
9754   }
9755
9756   // fold (fmul X, 2.0) -> (fadd X, X)
9757   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9758     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9759
9760   // fold (fmul X, -1.0) -> (fneg X)
9761   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9762     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9763       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9764
9765   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9766   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9767     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9768       // Both can be negated for free, check to see if at least one is cheaper
9769       // negated.
9770       if (LHSNeg == 2 || RHSNeg == 2)
9771         return DAG.getNode(ISD::FMUL, DL, VT,
9772                            GetNegatedExpression(N0, DAG, LegalOperations),
9773                            GetNegatedExpression(N1, DAG, LegalOperations),
9774                            Flags);
9775     }
9776   }
9777
9778   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
9779   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
9780   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
9781       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
9782       TLI.isOperationLegal(ISD::FABS, VT)) {
9783     SDValue Select = N0, X = N1;
9784     if (Select.getOpcode() != ISD::SELECT)
9785       std::swap(Select, X);
9786
9787     SDValue Cond = Select.getOperand(0);
9788     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
9789     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
9790
9791     if (TrueOpnd && FalseOpnd &&
9792         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
9793         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
9794         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
9795       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9796       switch (CC) {
9797       default: break;
9798       case ISD::SETOLT:
9799       case ISD::SETULT:
9800       case ISD::SETOLE:
9801       case ISD::SETULE:
9802       case ISD::SETLT:
9803       case ISD::SETLE:
9804         std::swap(TrueOpnd, FalseOpnd);
9805         // Fall through
9806       case ISD::SETOGT:
9807       case ISD::SETUGT:
9808       case ISD::SETOGE:
9809       case ISD::SETUGE:
9810       case ISD::SETGT:
9811       case ISD::SETGE:
9812         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
9813             TLI.isOperationLegal(ISD::FNEG, VT))
9814           return DAG.getNode(ISD::FNEG, DL, VT,
9815                    DAG.getNode(ISD::FABS, DL, VT, X));
9816         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
9817           return DAG.getNode(ISD::FABS, DL, VT, X);
9818
9819         break;
9820       }
9821     }
9822   }
9823
9824   // FMUL -> FMA combines:
9825   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9826     AddToWorklist(Fused.getNode());
9827     return Fused;
9828   }
9829
9830   return SDValue();
9831 }
9832
9833 SDValue DAGCombiner::visitFMA(SDNode *N) {
9834   SDValue N0 = N->getOperand(0);
9835   SDValue N1 = N->getOperand(1);
9836   SDValue N2 = N->getOperand(2);
9837   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9838   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9839   EVT VT = N->getValueType(0);
9840   SDLoc DL(N);
9841   const TargetOptions &Options = DAG.getTarget().Options;
9842
9843   // Constant fold FMA.
9844   if (isa<ConstantFPSDNode>(N0) &&
9845       isa<ConstantFPSDNode>(N1) &&
9846       isa<ConstantFPSDNode>(N2)) {
9847     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
9848   }
9849
9850   if (Options.UnsafeFPMath) {
9851     if (N0CFP && N0CFP->isZero())
9852       return N2;
9853     if (N1CFP && N1CFP->isZero())
9854       return N2;
9855   }
9856   // TODO: The FMA node should have flags that propagate to these nodes.
9857   if (N0CFP && N0CFP->isExactlyValue(1.0))
9858     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
9859   if (N1CFP && N1CFP->isExactlyValue(1.0))
9860     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
9861
9862   // Canonicalize (fma c, x, y) -> (fma x, c, y)
9863   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9864      !isConstantFPBuildVectorOrConstantFP(N1))
9865     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
9866
9867   // TODO: FMA nodes should have flags that propagate to the created nodes.
9868   // For now, create a Flags object for use with all unsafe math transforms.
9869   SDNodeFlags Flags;
9870   Flags.setUnsafeAlgebra(true);
9871
9872   if (Options.UnsafeFPMath) {
9873     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
9874     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
9875         isConstantFPBuildVectorOrConstantFP(N1) &&
9876         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
9877       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9878                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
9879                                      Flags), Flags);
9880     }
9881
9882     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
9883     if (N0.getOpcode() == ISD::FMUL &&
9884         isConstantFPBuildVectorOrConstantFP(N1) &&
9885         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
9886       return DAG.getNode(ISD::FMA, DL, VT,
9887                          N0.getOperand(0),
9888                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
9889                                      Flags),
9890                          N2);
9891     }
9892   }
9893
9894   // (fma x, 1, y) -> (fadd x, y)
9895   // (fma x, -1, y) -> (fadd (fneg x), y)
9896   if (N1CFP) {
9897     if (N1CFP->isExactlyValue(1.0))
9898       // TODO: The FMA node should have flags that propagate to this node.
9899       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
9900
9901     if (N1CFP->isExactlyValue(-1.0) &&
9902         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
9903       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
9904       AddToWorklist(RHSNeg.getNode());
9905       // TODO: The FMA node should have flags that propagate to this node.
9906       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9907     }
9908   }
9909
9910   if (Options.UnsafeFPMath) {
9911     // (fma x, c, x) -> (fmul x, (c+1))
9912     if (N1CFP && N0 == N2) {
9913       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9914                          DAG.getNode(ISD::FADD, DL, VT, N1,
9915                                      DAG.getConstantFP(1.0, DL, VT), Flags),
9916                          Flags);
9917     }
9918
9919     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
9920     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
9921       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9922                          DAG.getNode(ISD::FADD, DL, VT, N1,
9923                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
9924                          Flags);
9925     }
9926   }
9927
9928   return SDValue();
9929 }
9930
9931 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9932 // reciprocal.
9933 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
9934 // Notice that this is not always beneficial. One reason is different targets
9935 // may have different costs for FDIV and FMUL, so sometimes the cost of two
9936 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
9937 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
9938 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
9939   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
9940   const SDNodeFlags Flags = N->getFlags();
9941   if (!UnsafeMath && !Flags.hasAllowReciprocal())
9942     return SDValue();
9943
9944   // Skip if current node is a reciprocal.
9945   SDValue N0 = N->getOperand(0);
9946   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9947   if (N0CFP && N0CFP->isExactlyValue(1.0))
9948     return SDValue();
9949
9950   // Exit early if the target does not want this transform or if there can't
9951   // possibly be enough uses of the divisor to make the transform worthwhile.
9952   SDValue N1 = N->getOperand(1);
9953   unsigned MinUses = TLI.combineRepeatedFPDivisors();
9954   if (!MinUses || N1->use_size() < MinUses)
9955     return SDValue();
9956
9957   // Find all FDIV users of the same divisor.
9958   // Use a set because duplicates may be present in the user list.
9959   SetVector<SDNode *> Users;
9960   for (auto *U : N1->uses()) {
9961     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
9962       // This division is eligible for optimization only if global unsafe math
9963       // is enabled or if this division allows reciprocal formation.
9964       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
9965         Users.insert(U);
9966     }
9967   }
9968
9969   // Now that we have the actual number of divisor uses, make sure it meets
9970   // the minimum threshold specified by the target.
9971   if (Users.size() < MinUses)
9972     return SDValue();
9973
9974   EVT VT = N->getValueType(0);
9975   SDLoc DL(N);
9976   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
9977   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
9978
9979   // Dividend / Divisor -> Dividend * Reciprocal
9980   for (auto *U : Users) {
9981     SDValue Dividend = U->getOperand(0);
9982     if (Dividend != FPOne) {
9983       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
9984                                     Reciprocal, Flags);
9985       CombineTo(U, NewNode);
9986     } else if (U != Reciprocal.getNode()) {
9987       // In the absence of fast-math-flags, this user node is always the
9988       // same node as Reciprocal, but with FMF they may be different nodes.
9989       CombineTo(U, Reciprocal);
9990     }
9991   }
9992   return SDValue(N, 0);  // N was replaced.
9993 }
9994
9995 SDValue DAGCombiner::visitFDIV(SDNode *N) {
9996   SDValue N0 = N->getOperand(0);
9997   SDValue N1 = N->getOperand(1);
9998   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9999   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10000   EVT VT = N->getValueType(0);
10001   SDLoc DL(N);
10002   const TargetOptions &Options = DAG.getTarget().Options;
10003   SDNodeFlags Flags = N->getFlags();
10004
10005   // fold vector ops
10006   if (VT.isVector())
10007     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10008       return FoldedVOp;
10009
10010   // fold (fdiv c1, c2) -> c1/c2
10011   if (N0CFP && N1CFP)
10012     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10013
10014   if (SDValue NewSel = foldBinOpIntoSelect(N))
10015     return NewSel;
10016
10017   if (Options.UnsafeFPMath) {
10018     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10019     if (N1CFP) {
10020       // Compute the reciprocal 1.0 / c2.
10021       const APFloat &N1APF = N1CFP->getValueAPF();
10022       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10023       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10024       // Only do the transform if the reciprocal is a legal fp immediate that
10025       // isn't too nasty (eg NaN, denormal, ...).
10026       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10027           (!LegalOperations ||
10028            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10029            // backend)... we should handle this gracefully after Legalize.
10030            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
10031            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
10032            TLI.isFPImmLegal(Recip, VT)))
10033         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10034                            DAG.getConstantFP(Recip, DL, VT), Flags);
10035     }
10036
10037     // If this FDIV is part of a reciprocal square root, it may be folded
10038     // into a target-specific square root estimate instruction.
10039     if (N1.getOpcode() == ISD::FSQRT) {
10040       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10041         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10042       }
10043     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10044                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10045       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10046                                           Flags)) {
10047         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10048         AddToWorklist(RV.getNode());
10049         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10050       }
10051     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10052                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10053       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10054                                           Flags)) {
10055         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10056         AddToWorklist(RV.getNode());
10057         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10058       }
10059     } else if (N1.getOpcode() == ISD::FMUL) {
10060       // Look through an FMUL. Even though this won't remove the FDIV directly,
10061       // it's still worthwhile to get rid of the FSQRT if possible.
10062       SDValue SqrtOp;
10063       SDValue OtherOp;
10064       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10065         SqrtOp = N1.getOperand(0);
10066         OtherOp = N1.getOperand(1);
10067       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10068         SqrtOp = N1.getOperand(1);
10069         OtherOp = N1.getOperand(0);
10070       }
10071       if (SqrtOp.getNode()) {
10072         // We found a FSQRT, so try to make this fold:
10073         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10074         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10075           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10076           AddToWorklist(RV.getNode());
10077           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10078         }
10079       }
10080     }
10081
10082     // Fold into a reciprocal estimate and multiply instead of a real divide.
10083     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10084       AddToWorklist(RV.getNode());
10085       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10086     }
10087   }
10088
10089   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10090   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10091     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10092       // Both can be negated for free, check to see if at least one is cheaper
10093       // negated.
10094       if (LHSNeg == 2 || RHSNeg == 2)
10095         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10096                            GetNegatedExpression(N0, DAG, LegalOperations),
10097                            GetNegatedExpression(N1, DAG, LegalOperations),
10098                            Flags);
10099     }
10100   }
10101
10102   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10103     return CombineRepeatedDivisors;
10104
10105   return SDValue();
10106 }
10107
10108 SDValue DAGCombiner::visitFREM(SDNode *N) {
10109   SDValue N0 = N->getOperand(0);
10110   SDValue N1 = N->getOperand(1);
10111   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10112   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10113   EVT VT = N->getValueType(0);
10114
10115   // fold (frem c1, c2) -> fmod(c1,c2)
10116   if (N0CFP && N1CFP)
10117     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10118
10119   if (SDValue NewSel = foldBinOpIntoSelect(N))
10120     return NewSel;
10121
10122   return SDValue();
10123 }
10124
10125 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10126   if (!DAG.getTarget().Options.UnsafeFPMath)
10127     return SDValue();
10128
10129   SDValue N0 = N->getOperand(0);
10130   if (TLI.isFsqrtCheap(N0, DAG))
10131     return SDValue();
10132
10133   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10134   // For now, create a Flags object for use with all unsafe math transforms.
10135   SDNodeFlags Flags;
10136   Flags.setUnsafeAlgebra(true);
10137   return buildSqrtEstimate(N0, Flags);
10138 }
10139
10140 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10141 /// copysign(x, fp_round(y)) -> copysign(x, y)
10142 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10143   SDValue N1 = N->getOperand(1);
10144   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10145        N1.getOpcode() == ISD::FP_ROUND)) {
10146     // Do not optimize out type conversion of f128 type yet.
10147     // For some targets like x86_64, configuration is changed to keep one f128
10148     // value in one SSE register, but instruction selection cannot handle
10149     // FCOPYSIGN on SSE registers yet.
10150     EVT N1VT = N1->getValueType(0);
10151     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10152     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10153   }
10154   return false;
10155 }
10156
10157 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10158   SDValue N0 = N->getOperand(0);
10159   SDValue N1 = N->getOperand(1);
10160   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10161   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10162   EVT VT = N->getValueType(0);
10163
10164   if (N0CFP && N1CFP) // Constant fold
10165     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10166
10167   if (N1CFP) {
10168     const APFloat &V = N1CFP->getValueAPF();
10169     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10170     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10171     if (!V.isNegative()) {
10172       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10173         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10174     } else {
10175       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10176         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10177                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10178     }
10179   }
10180
10181   // copysign(fabs(x), y) -> copysign(x, y)
10182   // copysign(fneg(x), y) -> copysign(x, y)
10183   // copysign(copysign(x,z), y) -> copysign(x, y)
10184   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10185       N0.getOpcode() == ISD::FCOPYSIGN)
10186     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10187
10188   // copysign(x, abs(y)) -> abs(x)
10189   if (N1.getOpcode() == ISD::FABS)
10190     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10191
10192   // copysign(x, copysign(y,z)) -> copysign(x, z)
10193   if (N1.getOpcode() == ISD::FCOPYSIGN)
10194     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10195
10196   // copysign(x, fp_extend(y)) -> copysign(x, y)
10197   // copysign(x, fp_round(y)) -> copysign(x, y)
10198   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10199     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10200
10201   return SDValue();
10202 }
10203
10204 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10205   SDValue N0 = N->getOperand(0);
10206   EVT VT = N->getValueType(0);
10207   EVT OpVT = N0.getValueType();
10208
10209   // fold (sint_to_fp c1) -> c1fp
10210   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10211       // ...but only if the target supports immediate floating-point values
10212       (!LegalOperations ||
10213        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10214     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10215
10216   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10217   // but UINT_TO_FP is legal on this target, try to convert.
10218   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10219       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10220     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10221     if (DAG.SignBitIsZero(N0))
10222       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10223   }
10224
10225   // The next optimizations are desirable only if SELECT_CC can be lowered.
10226   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10227     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10228     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10229         !VT.isVector() &&
10230         (!LegalOperations ||
10231          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10232       SDLoc DL(N);
10233       SDValue Ops[] =
10234         { N0.getOperand(0), N0.getOperand(1),
10235           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10236           N0.getOperand(2) };
10237       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10238     }
10239
10240     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10241     //      (select_cc x, y, 1.0, 0.0,, cc)
10242     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10243         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10244         (!LegalOperations ||
10245          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10246       SDLoc DL(N);
10247       SDValue Ops[] =
10248         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10249           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10250           N0.getOperand(0).getOperand(2) };
10251       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10252     }
10253   }
10254
10255   return SDValue();
10256 }
10257
10258 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10259   SDValue N0 = N->getOperand(0);
10260   EVT VT = N->getValueType(0);
10261   EVT OpVT = N0.getValueType();
10262
10263   // fold (uint_to_fp c1) -> c1fp
10264   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10265       // ...but only if the target supports immediate floating-point values
10266       (!LegalOperations ||
10267        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10268     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10269
10270   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10271   // but SINT_TO_FP is legal on this target, try to convert.
10272   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10273       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10274     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10275     if (DAG.SignBitIsZero(N0))
10276       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10277   }
10278
10279   // The next optimizations are desirable only if SELECT_CC can be lowered.
10280   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10281     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10282
10283     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10284         (!LegalOperations ||
10285          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10286       SDLoc DL(N);
10287       SDValue Ops[] =
10288         { N0.getOperand(0), N0.getOperand(1),
10289           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10290           N0.getOperand(2) };
10291       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10292     }
10293   }
10294
10295   return SDValue();
10296 }
10297
10298 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10299 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10300   SDValue N0 = N->getOperand(0);
10301   EVT VT = N->getValueType(0);
10302
10303   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10304     return SDValue();
10305
10306   SDValue Src = N0.getOperand(0);
10307   EVT SrcVT = Src.getValueType();
10308   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10309   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10310
10311   // We can safely assume the conversion won't overflow the output range,
10312   // because (for example) (uint8_t)18293.f is undefined behavior.
10313
10314   // Since we can assume the conversion won't overflow, our decision as to
10315   // whether the input will fit in the float should depend on the minimum
10316   // of the input range and output range.
10317
10318   // This means this is also safe for a signed input and unsigned output, since
10319   // a negative input would lead to undefined behavior.
10320   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10321   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10322   unsigned ActualSize = std::min(InputSize, OutputSize);
10323   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10324
10325   // We can only fold away the float conversion if the input range can be
10326   // represented exactly in the float range.
10327   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10328     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10329       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10330                                                        : ISD::ZERO_EXTEND;
10331       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10332     }
10333     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10334       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10335     return DAG.getBitcast(VT, Src);
10336   }
10337   return SDValue();
10338 }
10339
10340 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10341   SDValue N0 = N->getOperand(0);
10342   EVT VT = N->getValueType(0);
10343
10344   // fold (fp_to_sint c1fp) -> c1
10345   if (isConstantFPBuildVectorOrConstantFP(N0))
10346     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10347
10348   return FoldIntToFPToInt(N, DAG);
10349 }
10350
10351 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10352   SDValue N0 = N->getOperand(0);
10353   EVT VT = N->getValueType(0);
10354
10355   // fold (fp_to_uint c1fp) -> c1
10356   if (isConstantFPBuildVectorOrConstantFP(N0))
10357     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10358
10359   return FoldIntToFPToInt(N, DAG);
10360 }
10361
10362 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10363   SDValue N0 = N->getOperand(0);
10364   SDValue N1 = N->getOperand(1);
10365   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10366   EVT VT = N->getValueType(0);
10367
10368   // fold (fp_round c1fp) -> c1fp
10369   if (N0CFP)
10370     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10371
10372   // fold (fp_round (fp_extend x)) -> x
10373   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10374     return N0.getOperand(0);
10375
10376   // fold (fp_round (fp_round x)) -> (fp_round x)
10377   if (N0.getOpcode() == ISD::FP_ROUND) {
10378     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10379     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10380
10381     // Skip this folding if it results in an fp_round from f80 to f16.
10382     //
10383     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10384     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10385     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10386     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10387     // x86.
10388     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10389       return SDValue();
10390
10391     // If the first fp_round isn't a value preserving truncation, it might
10392     // introduce a tie in the second fp_round, that wouldn't occur in the
10393     // single-step fp_round we want to fold to.
10394     // In other words, double rounding isn't the same as rounding.
10395     // Also, this is a value preserving truncation iff both fp_round's are.
10396     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10397       SDLoc DL(N);
10398       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10399                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10400     }
10401   }
10402
10403   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10404   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10405     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10406                               N0.getOperand(0), N1);
10407     AddToWorklist(Tmp.getNode());
10408     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10409                        Tmp, N0.getOperand(1));
10410   }
10411
10412   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10413     return NewVSel;
10414
10415   return SDValue();
10416 }
10417
10418 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10419   SDValue N0 = N->getOperand(0);
10420   EVT VT = N->getValueType(0);
10421   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10422   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10423
10424   // fold (fp_round_inreg c1fp) -> c1fp
10425   if (N0CFP && isTypeLegal(EVT)) {
10426     SDLoc DL(N);
10427     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10428     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10429   }
10430
10431   return SDValue();
10432 }
10433
10434 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10435   SDValue N0 = N->getOperand(0);
10436   EVT VT = N->getValueType(0);
10437
10438   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10439   if (N->hasOneUse() &&
10440       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10441     return SDValue();
10442
10443   // fold (fp_extend c1fp) -> c1fp
10444   if (isConstantFPBuildVectorOrConstantFP(N0))
10445     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10446
10447   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10448   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10449       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10450     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10451
10452   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10453   // value of X.
10454   if (N0.getOpcode() == ISD::FP_ROUND
10455       && N0.getConstantOperandVal(1) == 1) {
10456     SDValue In = N0.getOperand(0);
10457     if (In.getValueType() == VT) return In;
10458     if (VT.bitsLT(In.getValueType()))
10459       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10460                          In, N0.getOperand(1));
10461     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10462   }
10463
10464   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10465   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10466        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10467     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10468     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10469                                      LN0->getChain(),
10470                                      LN0->getBasePtr(), N0.getValueType(),
10471                                      LN0->getMemOperand());
10472     CombineTo(N, ExtLoad);
10473     CombineTo(N0.getNode(),
10474               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10475                           N0.getValueType(), ExtLoad,
10476                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10477               ExtLoad.getValue(1));
10478     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10479   }
10480
10481   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10482     return NewVSel;
10483
10484   return SDValue();
10485 }
10486
10487 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10488   SDValue N0 = N->getOperand(0);
10489   EVT VT = N->getValueType(0);
10490
10491   // fold (fceil c1) -> fceil(c1)
10492   if (isConstantFPBuildVectorOrConstantFP(N0))
10493     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10494
10495   return SDValue();
10496 }
10497
10498 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10499   SDValue N0 = N->getOperand(0);
10500   EVT VT = N->getValueType(0);
10501
10502   // fold (ftrunc c1) -> ftrunc(c1)
10503   if (isConstantFPBuildVectorOrConstantFP(N0))
10504     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10505
10506   return SDValue();
10507 }
10508
10509 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10510   SDValue N0 = N->getOperand(0);
10511   EVT VT = N->getValueType(0);
10512
10513   // fold (ffloor c1) -> ffloor(c1)
10514   if (isConstantFPBuildVectorOrConstantFP(N0))
10515     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10516
10517   return SDValue();
10518 }
10519
10520 // FIXME: FNEG and FABS have a lot in common; refactor.
10521 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10522   SDValue N0 = N->getOperand(0);
10523   EVT VT = N->getValueType(0);
10524
10525   // Constant fold FNEG.
10526   if (isConstantFPBuildVectorOrConstantFP(N0))
10527     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10528
10529   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10530                          &DAG.getTarget().Options))
10531     return GetNegatedExpression(N0, DAG, LegalOperations);
10532
10533   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10534   // constant pool values.
10535   if (!TLI.isFNegFree(VT) &&
10536       N0.getOpcode() == ISD::BITCAST &&
10537       N0.getNode()->hasOneUse()) {
10538     SDValue Int = N0.getOperand(0);
10539     EVT IntVT = Int.getValueType();
10540     if (IntVT.isInteger() && !IntVT.isVector()) {
10541       APInt SignMask;
10542       if (N0.getValueType().isVector()) {
10543         // For a vector, get a mask such as 0x80... per scalar element
10544         // and splat it.
10545         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10546         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10547       } else {
10548         // For a scalar, just generate 0x80...
10549         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10550       }
10551       SDLoc DL0(N0);
10552       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10553                         DAG.getConstant(SignMask, DL0, IntVT));
10554       AddToWorklist(Int.getNode());
10555       return DAG.getBitcast(VT, Int);
10556     }
10557   }
10558
10559   // (fneg (fmul c, x)) -> (fmul -c, x)
10560   if (N0.getOpcode() == ISD::FMUL &&
10561       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10562     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10563     if (CFP1) {
10564       APFloat CVal = CFP1->getValueAPF();
10565       CVal.changeSign();
10566       if (Level >= AfterLegalizeDAG &&
10567           (TLI.isFPImmLegal(CVal, VT) ||
10568            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10569         return DAG.getNode(
10570             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10571             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10572             N0->getFlags());
10573     }
10574   }
10575
10576   return SDValue();
10577 }
10578
10579 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10580   SDValue N0 = N->getOperand(0);
10581   SDValue N1 = N->getOperand(1);
10582   EVT VT = N->getValueType(0);
10583   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10584   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10585
10586   if (N0CFP && N1CFP) {
10587     const APFloat &C0 = N0CFP->getValueAPF();
10588     const APFloat &C1 = N1CFP->getValueAPF();
10589     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10590   }
10591
10592   // Canonicalize to constant on RHS.
10593   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10594      !isConstantFPBuildVectorOrConstantFP(N1))
10595     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10596
10597   return SDValue();
10598 }
10599
10600 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10601   SDValue N0 = N->getOperand(0);
10602   SDValue N1 = N->getOperand(1);
10603   EVT VT = N->getValueType(0);
10604   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10605   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10606
10607   if (N0CFP && N1CFP) {
10608     const APFloat &C0 = N0CFP->getValueAPF();
10609     const APFloat &C1 = N1CFP->getValueAPF();
10610     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10611   }
10612
10613   // Canonicalize to constant on RHS.
10614   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10615      !isConstantFPBuildVectorOrConstantFP(N1))
10616     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10617
10618   return SDValue();
10619 }
10620
10621 SDValue DAGCombiner::visitFABS(SDNode *N) {
10622   SDValue N0 = N->getOperand(0);
10623   EVT VT = N->getValueType(0);
10624
10625   // fold (fabs c1) -> fabs(c1)
10626   if (isConstantFPBuildVectorOrConstantFP(N0))
10627     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10628
10629   // fold (fabs (fabs x)) -> (fabs x)
10630   if (N0.getOpcode() == ISD::FABS)
10631     return N->getOperand(0);
10632
10633   // fold (fabs (fneg x)) -> (fabs x)
10634   // fold (fabs (fcopysign x, y)) -> (fabs x)
10635   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10636     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10637
10638   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10639   // constant pool values.
10640   if (!TLI.isFAbsFree(VT) &&
10641       N0.getOpcode() == ISD::BITCAST &&
10642       N0.getNode()->hasOneUse()) {
10643     SDValue Int = N0.getOperand(0);
10644     EVT IntVT = Int.getValueType();
10645     if (IntVT.isInteger() && !IntVT.isVector()) {
10646       APInt SignMask;
10647       if (N0.getValueType().isVector()) {
10648         // For a vector, get a mask such as 0x7f... per scalar element
10649         // and splat it.
10650         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10651         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10652       } else {
10653         // For a scalar, just generate 0x7f...
10654         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10655       }
10656       SDLoc DL(N0);
10657       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10658                         DAG.getConstant(SignMask, DL, IntVT));
10659       AddToWorklist(Int.getNode());
10660       return DAG.getBitcast(N->getValueType(0), Int);
10661     }
10662   }
10663
10664   return SDValue();
10665 }
10666
10667 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10668   SDValue Chain = N->getOperand(0);
10669   SDValue N1 = N->getOperand(1);
10670   SDValue N2 = N->getOperand(2);
10671
10672   // If N is a constant we could fold this into a fallthrough or unconditional
10673   // branch. However that doesn't happen very often in normal code, because
10674   // Instcombine/SimplifyCFG should have handled the available opportunities.
10675   // If we did this folding here, it would be necessary to update the
10676   // MachineBasicBlock CFG, which is awkward.
10677
10678   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10679   // on the target.
10680   if (N1.getOpcode() == ISD::SETCC &&
10681       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10682                                    N1.getOperand(0).getValueType())) {
10683     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10684                        Chain, N1.getOperand(2),
10685                        N1.getOperand(0), N1.getOperand(1), N2);
10686   }
10687
10688   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10689       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10690        (N1.getOperand(0).hasOneUse() &&
10691         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10692     SDNode *Trunc = nullptr;
10693     if (N1.getOpcode() == ISD::TRUNCATE) {
10694       // Look pass the truncate.
10695       Trunc = N1.getNode();
10696       N1 = N1.getOperand(0);
10697     }
10698
10699     // Match this pattern so that we can generate simpler code:
10700     //
10701     //   %a = ...
10702     //   %b = and i32 %a, 2
10703     //   %c = srl i32 %b, 1
10704     //   brcond i32 %c ...
10705     //
10706     // into
10707     //
10708     //   %a = ...
10709     //   %b = and i32 %a, 2
10710     //   %c = setcc eq %b, 0
10711     //   brcond %c ...
10712     //
10713     // This applies only when the AND constant value has one bit set and the
10714     // SRL constant is equal to the log2 of the AND constant. The back-end is
10715     // smart enough to convert the result into a TEST/JMP sequence.
10716     SDValue Op0 = N1.getOperand(0);
10717     SDValue Op1 = N1.getOperand(1);
10718
10719     if (Op0.getOpcode() == ISD::AND &&
10720         Op1.getOpcode() == ISD::Constant) {
10721       SDValue AndOp1 = Op0.getOperand(1);
10722
10723       if (AndOp1.getOpcode() == ISD::Constant) {
10724         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10725
10726         if (AndConst.isPowerOf2() &&
10727             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10728           SDLoc DL(N);
10729           SDValue SetCC =
10730             DAG.getSetCC(DL,
10731                          getSetCCResultType(Op0.getValueType()),
10732                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10733                          ISD::SETNE);
10734
10735           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10736                                           MVT::Other, Chain, SetCC, N2);
10737           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10738           // will convert it back to (X & C1) >> C2.
10739           CombineTo(N, NewBRCond, false);
10740           // Truncate is dead.
10741           if (Trunc)
10742             deleteAndRecombine(Trunc);
10743           // Replace the uses of SRL with SETCC
10744           WorklistRemover DeadNodes(*this);
10745           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10746           deleteAndRecombine(N1.getNode());
10747           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10748         }
10749       }
10750     }
10751
10752     if (Trunc)
10753       // Restore N1 if the above transformation doesn't match.
10754       N1 = N->getOperand(1);
10755   }
10756
10757   // Transform br(xor(x, y)) -> br(x != y)
10758   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10759   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10760     SDNode *TheXor = N1.getNode();
10761     SDValue Op0 = TheXor->getOperand(0);
10762     SDValue Op1 = TheXor->getOperand(1);
10763     if (Op0.getOpcode() == Op1.getOpcode()) {
10764       // Avoid missing important xor optimizations.
10765       if (SDValue Tmp = visitXOR(TheXor)) {
10766         if (Tmp.getNode() != TheXor) {
10767           DEBUG(dbgs() << "\nReplacing.8 ";
10768                 TheXor->dump(&DAG);
10769                 dbgs() << "\nWith: ";
10770                 Tmp.getNode()->dump(&DAG);
10771                 dbgs() << '\n');
10772           WorklistRemover DeadNodes(*this);
10773           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10774           deleteAndRecombine(TheXor);
10775           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10776                              MVT::Other, Chain, Tmp, N2);
10777         }
10778
10779         // visitXOR has changed XOR's operands or replaced the XOR completely,
10780         // bail out.
10781         return SDValue(N, 0);
10782       }
10783     }
10784
10785     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10786       bool Equal = false;
10787       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10788           Op0.getOpcode() == ISD::XOR) {
10789         TheXor = Op0.getNode();
10790         Equal = true;
10791       }
10792
10793       EVT SetCCVT = N1.getValueType();
10794       if (LegalTypes)
10795         SetCCVT = getSetCCResultType(SetCCVT);
10796       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10797                                    SetCCVT,
10798                                    Op0, Op1,
10799                                    Equal ? ISD::SETEQ : ISD::SETNE);
10800       // Replace the uses of XOR with SETCC
10801       WorklistRemover DeadNodes(*this);
10802       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10803       deleteAndRecombine(N1.getNode());
10804       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10805                          MVT::Other, Chain, SetCC, N2);
10806     }
10807   }
10808
10809   return SDValue();
10810 }
10811
10812 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10813 //
10814 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10815   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10816   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10817
10818   // If N is a constant we could fold this into a fallthrough or unconditional
10819   // branch. However that doesn't happen very often in normal code, because
10820   // Instcombine/SimplifyCFG should have handled the available opportunities.
10821   // If we did this folding here, it would be necessary to update the
10822   // MachineBasicBlock CFG, which is awkward.
10823
10824   // Use SimplifySetCC to simplify SETCC's.
10825   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10826                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10827                                false);
10828   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10829
10830   // fold to a simpler setcc
10831   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
10832     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10833                        N->getOperand(0), Simp.getOperand(2),
10834                        Simp.getOperand(0), Simp.getOperand(1),
10835                        N->getOperand(4));
10836
10837   return SDValue();
10838 }
10839
10840 /// Return true if 'Use' is a load or a store that uses N as its base pointer
10841 /// and that N may be folded in the load / store addressing mode.
10842 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
10843                                     SelectionDAG &DAG,
10844                                     const TargetLowering &TLI) {
10845   EVT VT;
10846   unsigned AS;
10847
10848   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
10849     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
10850       return false;
10851     VT = LD->getMemoryVT();
10852     AS = LD->getAddressSpace();
10853   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
10854     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
10855       return false;
10856     VT = ST->getMemoryVT();
10857     AS = ST->getAddressSpace();
10858   } else
10859     return false;
10860
10861   TargetLowering::AddrMode AM;
10862   if (N->getOpcode() == ISD::ADD) {
10863     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10864     if (Offset)
10865       // [reg +/- imm]
10866       AM.BaseOffs = Offset->getSExtValue();
10867     else
10868       // [reg +/- reg]
10869       AM.Scale = 1;
10870   } else if (N->getOpcode() == ISD::SUB) {
10871     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10872     if (Offset)
10873       // [reg +/- imm]
10874       AM.BaseOffs = -Offset->getSExtValue();
10875     else
10876       // [reg +/- reg]
10877       AM.Scale = 1;
10878   } else
10879     return false;
10880
10881   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
10882                                    VT.getTypeForEVT(*DAG.getContext()), AS);
10883 }
10884
10885 /// Try turning a load/store into a pre-indexed load/store when the base
10886 /// pointer is an add or subtract and it has other uses besides the load/store.
10887 /// After the transformation, the new indexed load/store has effectively folded
10888 /// the add/subtract in and all of its other uses are redirected to the
10889 /// new load/store.
10890 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
10891   if (Level < AfterLegalizeDAG)
10892     return false;
10893
10894   bool isLoad = true;
10895   SDValue Ptr;
10896   EVT VT;
10897   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10898     if (LD->isIndexed())
10899       return false;
10900     VT = LD->getMemoryVT();
10901     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
10902         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
10903       return false;
10904     Ptr = LD->getBasePtr();
10905   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10906     if (ST->isIndexed())
10907       return false;
10908     VT = ST->getMemoryVT();
10909     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
10910         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
10911       return false;
10912     Ptr = ST->getBasePtr();
10913     isLoad = false;
10914   } else {
10915     return false;
10916   }
10917
10918   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
10919   // out.  There is no reason to make this a preinc/predec.
10920   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
10921       Ptr.getNode()->hasOneUse())
10922     return false;
10923
10924   // Ask the target to do addressing mode selection.
10925   SDValue BasePtr;
10926   SDValue Offset;
10927   ISD::MemIndexedMode AM = ISD::UNINDEXED;
10928   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
10929     return false;
10930
10931   // Backends without true r+i pre-indexed forms may need to pass a
10932   // constant base with a variable offset so that constant coercion
10933   // will work with the patterns in canonical form.
10934   bool Swapped = false;
10935   if (isa<ConstantSDNode>(BasePtr)) {
10936     std::swap(BasePtr, Offset);
10937     Swapped = true;
10938   }
10939
10940   // Don't create a indexed load / store with zero offset.
10941   if (isNullConstant(Offset))
10942     return false;
10943
10944   // Try turning it into a pre-indexed load / store except when:
10945   // 1) The new base ptr is a frame index.
10946   // 2) If N is a store and the new base ptr is either the same as or is a
10947   //    predecessor of the value being stored.
10948   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
10949   //    that would create a cycle.
10950   // 4) All uses are load / store ops that use it as old base ptr.
10951
10952   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
10953   // (plus the implicit offset) to a register to preinc anyway.
10954   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10955     return false;
10956
10957   // Check #2.
10958   if (!isLoad) {
10959     SDValue Val = cast<StoreSDNode>(N)->getValue();
10960     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
10961       return false;
10962   }
10963
10964   // Caches for hasPredecessorHelper.
10965   SmallPtrSet<const SDNode *, 32> Visited;
10966   SmallVector<const SDNode *, 16> Worklist;
10967   Worklist.push_back(N);
10968
10969   // If the offset is a constant, there may be other adds of constants that
10970   // can be folded with this one. We should do this to avoid having to keep
10971   // a copy of the original base pointer.
10972   SmallVector<SDNode *, 16> OtherUses;
10973   if (isa<ConstantSDNode>(Offset))
10974     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
10975                               UE = BasePtr.getNode()->use_end();
10976          UI != UE; ++UI) {
10977       SDUse &Use = UI.getUse();
10978       // Skip the use that is Ptr and uses of other results from BasePtr's
10979       // node (important for nodes that return multiple results).
10980       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
10981         continue;
10982
10983       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
10984         continue;
10985
10986       if (Use.getUser()->getOpcode() != ISD::ADD &&
10987           Use.getUser()->getOpcode() != ISD::SUB) {
10988         OtherUses.clear();
10989         break;
10990       }
10991
10992       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
10993       if (!isa<ConstantSDNode>(Op1)) {
10994         OtherUses.clear();
10995         break;
10996       }
10997
10998       // FIXME: In some cases, we can be smarter about this.
10999       if (Op1.getValueType() != Offset.getValueType()) {
11000         OtherUses.clear();
11001         break;
11002       }
11003
11004       OtherUses.push_back(Use.getUser());
11005     }
11006
11007   if (Swapped)
11008     std::swap(BasePtr, Offset);
11009
11010   // Now check for #3 and #4.
11011   bool RealUse = false;
11012
11013   for (SDNode *Use : Ptr.getNode()->uses()) {
11014     if (Use == N)
11015       continue;
11016     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11017       return false;
11018
11019     // If Ptr may be folded in addressing mode of other use, then it's
11020     // not profitable to do this transformation.
11021     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11022       RealUse = true;
11023   }
11024
11025   if (!RealUse)
11026     return false;
11027
11028   SDValue Result;
11029   if (isLoad)
11030     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11031                                 BasePtr, Offset, AM);
11032   else
11033     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11034                                  BasePtr, Offset, AM);
11035   ++PreIndexedNodes;
11036   ++NodesCombined;
11037   DEBUG(dbgs() << "\nReplacing.4 ";
11038         N->dump(&DAG);
11039         dbgs() << "\nWith: ";
11040         Result.getNode()->dump(&DAG);
11041         dbgs() << '\n');
11042   WorklistRemover DeadNodes(*this);
11043   if (isLoad) {
11044     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11045     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11046   } else {
11047     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11048   }
11049
11050   // Finally, since the node is now dead, remove it from the graph.
11051   deleteAndRecombine(N);
11052
11053   if (Swapped)
11054     std::swap(BasePtr, Offset);
11055
11056   // Replace other uses of BasePtr that can be updated to use Ptr
11057   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11058     unsigned OffsetIdx = 1;
11059     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11060       OffsetIdx = 0;
11061     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11062            BasePtr.getNode() && "Expected BasePtr operand");
11063
11064     // We need to replace ptr0 in the following expression:
11065     //   x0 * offset0 + y0 * ptr0 = t0
11066     // knowing that
11067     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11068     //
11069     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11070     // indexed load/store and the expression that needs to be re-written.
11071     //
11072     // Therefore, we have:
11073     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11074
11075     ConstantSDNode *CN =
11076       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11077     int X0, X1, Y0, Y1;
11078     const APInt &Offset0 = CN->getAPIntValue();
11079     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11080
11081     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11082     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11083     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11084     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11085
11086     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11087
11088     APInt CNV = Offset0;
11089     if (X0 < 0) CNV = -CNV;
11090     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11091     else CNV = CNV - Offset1;
11092
11093     SDLoc DL(OtherUses[i]);
11094
11095     // We can now generate the new expression.
11096     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11097     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11098
11099     SDValue NewUse = DAG.getNode(Opcode,
11100                                  DL,
11101                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11102     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11103     deleteAndRecombine(OtherUses[i]);
11104   }
11105
11106   // Replace the uses of Ptr with uses of the updated base value.
11107   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11108   deleteAndRecombine(Ptr.getNode());
11109
11110   return true;
11111 }
11112
11113 /// Try to combine a load/store with a add/sub of the base pointer node into a
11114 /// post-indexed load/store. The transformation folded the add/subtract into the
11115 /// new indexed load/store effectively and all of its uses are redirected to the
11116 /// new load/store.
11117 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11118   if (Level < AfterLegalizeDAG)
11119     return false;
11120
11121   bool isLoad = true;
11122   SDValue Ptr;
11123   EVT VT;
11124   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11125     if (LD->isIndexed())
11126       return false;
11127     VT = LD->getMemoryVT();
11128     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11129         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11130       return false;
11131     Ptr = LD->getBasePtr();
11132   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11133     if (ST->isIndexed())
11134       return false;
11135     VT = ST->getMemoryVT();
11136     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11137         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11138       return false;
11139     Ptr = ST->getBasePtr();
11140     isLoad = false;
11141   } else {
11142     return false;
11143   }
11144
11145   if (Ptr.getNode()->hasOneUse())
11146     return false;
11147
11148   for (SDNode *Op : Ptr.getNode()->uses()) {
11149     if (Op == N ||
11150         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11151       continue;
11152
11153     SDValue BasePtr;
11154     SDValue Offset;
11155     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11156     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11157       // Don't create a indexed load / store with zero offset.
11158       if (isNullConstant(Offset))
11159         continue;
11160
11161       // Try turning it into a post-indexed load / store except when
11162       // 1) All uses are load / store ops that use it as base ptr (and
11163       //    it may be folded as addressing mmode).
11164       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11165       //    nor a successor of N. Otherwise, if Op is folded that would
11166       //    create a cycle.
11167
11168       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11169         continue;
11170
11171       // Check for #1.
11172       bool TryNext = false;
11173       for (SDNode *Use : BasePtr.getNode()->uses()) {
11174         if (Use == Ptr.getNode())
11175           continue;
11176
11177         // If all the uses are load / store addresses, then don't do the
11178         // transformation.
11179         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11180           bool RealUse = false;
11181           for (SDNode *UseUse : Use->uses()) {
11182             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11183               RealUse = true;
11184           }
11185
11186           if (!RealUse) {
11187             TryNext = true;
11188             break;
11189           }
11190         }
11191       }
11192
11193       if (TryNext)
11194         continue;
11195
11196       // Check for #2
11197       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11198         SDValue Result = isLoad
11199           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11200                                BasePtr, Offset, AM)
11201           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11202                                 BasePtr, Offset, AM);
11203         ++PostIndexedNodes;
11204         ++NodesCombined;
11205         DEBUG(dbgs() << "\nReplacing.5 ";
11206               N->dump(&DAG);
11207               dbgs() << "\nWith: ";
11208               Result.getNode()->dump(&DAG);
11209               dbgs() << '\n');
11210         WorklistRemover DeadNodes(*this);
11211         if (isLoad) {
11212           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11213           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11214         } else {
11215           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11216         }
11217
11218         // Finally, since the node is now dead, remove it from the graph.
11219         deleteAndRecombine(N);
11220
11221         // Replace the uses of Use with uses of the updated base value.
11222         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11223                                       Result.getValue(isLoad ? 1 : 0));
11224         deleteAndRecombine(Op);
11225         return true;
11226       }
11227     }
11228   }
11229
11230   return false;
11231 }
11232
11233 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11234 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11235   ISD::MemIndexedMode AM = LD->getAddressingMode();
11236   assert(AM != ISD::UNINDEXED);
11237   SDValue BP = LD->getOperand(1);
11238   SDValue Inc = LD->getOperand(2);
11239
11240   // Some backends use TargetConstants for load offsets, but don't expect
11241   // TargetConstants in general ADD nodes. We can convert these constants into
11242   // regular Constants (if the constant is not opaque).
11243   assert((Inc.getOpcode() != ISD::TargetConstant ||
11244           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11245          "Cannot split out indexing using opaque target constants");
11246   if (Inc.getOpcode() == ISD::TargetConstant) {
11247     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11248     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11249                           ConstInc->getValueType(0));
11250   }
11251
11252   unsigned Opc =
11253       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11254   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11255 }
11256
11257 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11258   LoadSDNode *LD  = cast<LoadSDNode>(N);
11259   SDValue Chain = LD->getChain();
11260   SDValue Ptr   = LD->getBasePtr();
11261
11262   // If load is not volatile and there are no uses of the loaded value (and
11263   // the updated indexed value in case of indexed loads), change uses of the
11264   // chain value into uses of the chain input (i.e. delete the dead load).
11265   if (!LD->isVolatile()) {
11266     if (N->getValueType(1) == MVT::Other) {
11267       // Unindexed loads.
11268       if (!N->hasAnyUseOfValue(0)) {
11269         // It's not safe to use the two value CombineTo variant here. e.g.
11270         // v1, chain2 = load chain1, loc
11271         // v2, chain3 = load chain2, loc
11272         // v3         = add v2, c
11273         // Now we replace use of chain2 with chain1.  This makes the second load
11274         // isomorphic to the one we are deleting, and thus makes this load live.
11275         DEBUG(dbgs() << "\nReplacing.6 ";
11276               N->dump(&DAG);
11277               dbgs() << "\nWith chain: ";
11278               Chain.getNode()->dump(&DAG);
11279               dbgs() << "\n");
11280         WorklistRemover DeadNodes(*this);
11281         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11282         AddUsersToWorklist(Chain.getNode());
11283         if (N->use_empty())
11284           deleteAndRecombine(N);
11285
11286         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11287       }
11288     } else {
11289       // Indexed loads.
11290       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11291
11292       // If this load has an opaque TargetConstant offset, then we cannot split
11293       // the indexing into an add/sub directly (that TargetConstant may not be
11294       // valid for a different type of node, and we cannot convert an opaque
11295       // target constant into a regular constant).
11296       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11297                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11298
11299       if (!N->hasAnyUseOfValue(0) &&
11300           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11301         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11302         SDValue Index;
11303         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11304           Index = SplitIndexingFromLoad(LD);
11305           // Try to fold the base pointer arithmetic into subsequent loads and
11306           // stores.
11307           AddUsersToWorklist(N);
11308         } else
11309           Index = DAG.getUNDEF(N->getValueType(1));
11310         DEBUG(dbgs() << "\nReplacing.7 ";
11311               N->dump(&DAG);
11312               dbgs() << "\nWith: ";
11313               Undef.getNode()->dump(&DAG);
11314               dbgs() << " and 2 other values\n");
11315         WorklistRemover DeadNodes(*this);
11316         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11317         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11318         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11319         deleteAndRecombine(N);
11320         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11321       }
11322     }
11323   }
11324
11325   // If this load is directly stored, replace the load value with the stored
11326   // value.
11327   // TODO: Handle store large -> read small portion.
11328   // TODO: Handle TRUNCSTORE/LOADEXT
11329   if (OptLevel != CodeGenOpt::None &&
11330       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11331     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11332       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11333       if (PrevST->getBasePtr() == Ptr &&
11334           PrevST->getValue().getValueType() == N->getValueType(0))
11335         return CombineTo(N, PrevST->getOperand(1), Chain);
11336     }
11337   }
11338
11339   // Try to infer better alignment information than the load already has.
11340   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11341     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11342       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11343         SDValue NewLoad = DAG.getExtLoad(
11344             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11345             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11346             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11347         if (NewLoad.getNode() != N)
11348           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11349       }
11350     }
11351   }
11352
11353   if (LD->isUnindexed()) {
11354     // Walk up chain skipping non-aliasing memory nodes.
11355     SDValue BetterChain = FindBetterChain(N, Chain);
11356
11357     // If there is a better chain.
11358     if (Chain != BetterChain) {
11359       SDValue ReplLoad;
11360
11361       // Replace the chain to void dependency.
11362       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11363         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11364                                BetterChain, Ptr, LD->getMemOperand());
11365       } else {
11366         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11367                                   LD->getValueType(0),
11368                                   BetterChain, Ptr, LD->getMemoryVT(),
11369                                   LD->getMemOperand());
11370       }
11371
11372       // Create token factor to keep old chain connected.
11373       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11374                                   MVT::Other, Chain, ReplLoad.getValue(1));
11375
11376       // Make sure the new and old chains are cleaned up.
11377       AddToWorklist(Token.getNode());
11378
11379       // Replace uses with load result and token factor. Don't add users
11380       // to work list.
11381       return CombineTo(N, ReplLoad.getValue(0), Token, false);
11382     }
11383   }
11384
11385   // Try transforming N to an indexed load.
11386   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11387     return SDValue(N, 0);
11388
11389   // Try to slice up N to more direct loads if the slices are mapped to
11390   // different register banks or pairing can take place.
11391   if (SliceUpLoad(N))
11392     return SDValue(N, 0);
11393
11394   return SDValue();
11395 }
11396
11397 namespace {
11398 /// \brief Helper structure used to slice a load in smaller loads.
11399 /// Basically a slice is obtained from the following sequence:
11400 /// Origin = load Ty1, Base
11401 /// Shift = srl Ty1 Origin, CstTy Amount
11402 /// Inst = trunc Shift to Ty2
11403 ///
11404 /// Then, it will be rewritten into:
11405 /// Slice = load SliceTy, Base + SliceOffset
11406 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11407 ///
11408 /// SliceTy is deduced from the number of bits that are actually used to
11409 /// build Inst.
11410 struct LoadedSlice {
11411   /// \brief Helper structure used to compute the cost of a slice.
11412   struct Cost {
11413     /// Are we optimizing for code size.
11414     bool ForCodeSize;
11415     /// Various cost.
11416     unsigned Loads;
11417     unsigned Truncates;
11418     unsigned CrossRegisterBanksCopies;
11419     unsigned ZExts;
11420     unsigned Shift;
11421
11422     Cost(bool ForCodeSize = false)
11423         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11424           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11425
11426     /// \brief Get the cost of one isolated slice.
11427     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11428         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11429           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11430       EVT TruncType = LS.Inst->getValueType(0);
11431       EVT LoadedType = LS.getLoadedType();
11432       if (TruncType != LoadedType &&
11433           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11434         ZExts = 1;
11435     }
11436
11437     /// \brief Account for slicing gain in the current cost.
11438     /// Slicing provide a few gains like removing a shift or a
11439     /// truncate. This method allows to grow the cost of the original
11440     /// load with the gain from this slice.
11441     void addSliceGain(const LoadedSlice &LS) {
11442       // Each slice saves a truncate.
11443       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11444       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11445                               LS.Inst->getValueType(0)))
11446         ++Truncates;
11447       // If there is a shift amount, this slice gets rid of it.
11448       if (LS.Shift)
11449         ++Shift;
11450       // If this slice can merge a cross register bank copy, account for it.
11451       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11452         ++CrossRegisterBanksCopies;
11453     }
11454
11455     Cost &operator+=(const Cost &RHS) {
11456       Loads += RHS.Loads;
11457       Truncates += RHS.Truncates;
11458       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11459       ZExts += RHS.ZExts;
11460       Shift += RHS.Shift;
11461       return *this;
11462     }
11463
11464     bool operator==(const Cost &RHS) const {
11465       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11466              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11467              ZExts == RHS.ZExts && Shift == RHS.Shift;
11468     }
11469
11470     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11471
11472     bool operator<(const Cost &RHS) const {
11473       // Assume cross register banks copies are as expensive as loads.
11474       // FIXME: Do we want some more target hooks?
11475       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11476       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11477       // Unless we are optimizing for code size, consider the
11478       // expensive operation first.
11479       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11480         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11481       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11482              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11483     }
11484
11485     bool operator>(const Cost &RHS) const { return RHS < *this; }
11486
11487     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11488
11489     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11490   };
11491   // The last instruction that represent the slice. This should be a
11492   // truncate instruction.
11493   SDNode *Inst;
11494   // The original load instruction.
11495   LoadSDNode *Origin;
11496   // The right shift amount in bits from the original load.
11497   unsigned Shift;
11498   // The DAG from which Origin came from.
11499   // This is used to get some contextual information about legal types, etc.
11500   SelectionDAG *DAG;
11501
11502   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11503               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11504       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11505
11506   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11507   /// \return Result is \p BitWidth and has used bits set to 1 and
11508   ///         not used bits set to 0.
11509   APInt getUsedBits() const {
11510     // Reproduce the trunc(lshr) sequence:
11511     // - Start from the truncated value.
11512     // - Zero extend to the desired bit width.
11513     // - Shift left.
11514     assert(Origin && "No original load to compare against.");
11515     unsigned BitWidth = Origin->getValueSizeInBits(0);
11516     assert(Inst && "This slice is not bound to an instruction");
11517     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11518            "Extracted slice is bigger than the whole type!");
11519     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11520     UsedBits.setAllBits();
11521     UsedBits = UsedBits.zext(BitWidth);
11522     UsedBits <<= Shift;
11523     return UsedBits;
11524   }
11525
11526   /// \brief Get the size of the slice to be loaded in bytes.
11527   unsigned getLoadedSize() const {
11528     unsigned SliceSize = getUsedBits().countPopulation();
11529     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11530     return SliceSize / 8;
11531   }
11532
11533   /// \brief Get the type that will be loaded for this slice.
11534   /// Note: This may not be the final type for the slice.
11535   EVT getLoadedType() const {
11536     assert(DAG && "Missing context");
11537     LLVMContext &Ctxt = *DAG->getContext();
11538     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11539   }
11540
11541   /// \brief Get the alignment of the load used for this slice.
11542   unsigned getAlignment() const {
11543     unsigned Alignment = Origin->getAlignment();
11544     unsigned Offset = getOffsetFromBase();
11545     if (Offset != 0)
11546       Alignment = MinAlign(Alignment, Alignment + Offset);
11547     return Alignment;
11548   }
11549
11550   /// \brief Check if this slice can be rewritten with legal operations.
11551   bool isLegal() const {
11552     // An invalid slice is not legal.
11553     if (!Origin || !Inst || !DAG)
11554       return false;
11555
11556     // Offsets are for indexed load only, we do not handle that.
11557     if (!Origin->getOffset().isUndef())
11558       return false;
11559
11560     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11561
11562     // Check that the type is legal.
11563     EVT SliceType = getLoadedType();
11564     if (!TLI.isTypeLegal(SliceType))
11565       return false;
11566
11567     // Check that the load is legal for this type.
11568     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11569       return false;
11570
11571     // Check that the offset can be computed.
11572     // 1. Check its type.
11573     EVT PtrType = Origin->getBasePtr().getValueType();
11574     if (PtrType == MVT::Untyped || PtrType.isExtended())
11575       return false;
11576
11577     // 2. Check that it fits in the immediate.
11578     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11579       return false;
11580
11581     // 3. Check that the computation is legal.
11582     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11583       return false;
11584
11585     // Check that the zext is legal if it needs one.
11586     EVT TruncateType = Inst->getValueType(0);
11587     if (TruncateType != SliceType &&
11588         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11589       return false;
11590
11591     return true;
11592   }
11593
11594   /// \brief Get the offset in bytes of this slice in the original chunk of
11595   /// bits.
11596   /// \pre DAG != nullptr.
11597   uint64_t getOffsetFromBase() const {
11598     assert(DAG && "Missing context.");
11599     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11600     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11601     uint64_t Offset = Shift / 8;
11602     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11603     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11604            "The size of the original loaded type is not a multiple of a"
11605            " byte.");
11606     // If Offset is bigger than TySizeInBytes, it means we are loading all
11607     // zeros. This should have been optimized before in the process.
11608     assert(TySizeInBytes > Offset &&
11609            "Invalid shift amount for given loaded size");
11610     if (IsBigEndian)
11611       Offset = TySizeInBytes - Offset - getLoadedSize();
11612     return Offset;
11613   }
11614
11615   /// \brief Generate the sequence of instructions to load the slice
11616   /// represented by this object and redirect the uses of this slice to
11617   /// this new sequence of instructions.
11618   /// \pre this->Inst && this->Origin are valid Instructions and this
11619   /// object passed the legal check: LoadedSlice::isLegal returned true.
11620   /// \return The last instruction of the sequence used to load the slice.
11621   SDValue loadSlice() const {
11622     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11623     const SDValue &OldBaseAddr = Origin->getBasePtr();
11624     SDValue BaseAddr = OldBaseAddr;
11625     // Get the offset in that chunk of bytes w.r.t. the endianness.
11626     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11627     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11628     if (Offset) {
11629       // BaseAddr = BaseAddr + Offset.
11630       EVT ArithType = BaseAddr.getValueType();
11631       SDLoc DL(Origin);
11632       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11633                               DAG->getConstant(Offset, DL, ArithType));
11634     }
11635
11636     // Create the type of the loaded slice according to its size.
11637     EVT SliceType = getLoadedType();
11638
11639     // Create the load for the slice.
11640     SDValue LastInst =
11641         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11642                      Origin->getPointerInfo().getWithOffset(Offset),
11643                      getAlignment(), Origin->getMemOperand()->getFlags());
11644     // If the final type is not the same as the loaded type, this means that
11645     // we have to pad with zero. Create a zero extend for that.
11646     EVT FinalType = Inst->getValueType(0);
11647     if (SliceType != FinalType)
11648       LastInst =
11649           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11650     return LastInst;
11651   }
11652
11653   /// \brief Check if this slice can be merged with an expensive cross register
11654   /// bank copy. E.g.,
11655   /// i = load i32
11656   /// f = bitcast i32 i to float
11657   bool canMergeExpensiveCrossRegisterBankCopy() const {
11658     if (!Inst || !Inst->hasOneUse())
11659       return false;
11660     SDNode *Use = *Inst->use_begin();
11661     if (Use->getOpcode() != ISD::BITCAST)
11662       return false;
11663     assert(DAG && "Missing context");
11664     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11665     EVT ResVT = Use->getValueType(0);
11666     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11667     const TargetRegisterClass *ArgRC =
11668         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11669     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11670       return false;
11671
11672     // At this point, we know that we perform a cross-register-bank copy.
11673     // Check if it is expensive.
11674     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11675     // Assume bitcasts are cheap, unless both register classes do not
11676     // explicitly share a common sub class.
11677     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11678       return false;
11679
11680     // Check if it will be merged with the load.
11681     // 1. Check the alignment constraint.
11682     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11683         ResVT.getTypeForEVT(*DAG->getContext()));
11684
11685     if (RequiredAlignment > getAlignment())
11686       return false;
11687
11688     // 2. Check that the load is a legal operation for that type.
11689     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11690       return false;
11691
11692     // 3. Check that we do not have a zext in the way.
11693     if (Inst->getValueType(0) != getLoadedType())
11694       return false;
11695
11696     return true;
11697   }
11698 };
11699 }
11700
11701 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11702 /// \p UsedBits looks like 0..0 1..1 0..0.
11703 static bool areUsedBitsDense(const APInt &UsedBits) {
11704   // If all the bits are one, this is dense!
11705   if (UsedBits.isAllOnesValue())
11706     return true;
11707
11708   // Get rid of the unused bits on the right.
11709   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11710   // Get rid of the unused bits on the left.
11711   if (NarrowedUsedBits.countLeadingZeros())
11712     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11713   // Check that the chunk of bits is completely used.
11714   return NarrowedUsedBits.isAllOnesValue();
11715 }
11716
11717 /// \brief Check whether or not \p First and \p Second are next to each other
11718 /// in memory. This means that there is no hole between the bits loaded
11719 /// by \p First and the bits loaded by \p Second.
11720 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11721                                      const LoadedSlice &Second) {
11722   assert(First.Origin == Second.Origin && First.Origin &&
11723          "Unable to match different memory origins.");
11724   APInt UsedBits = First.getUsedBits();
11725   assert((UsedBits & Second.getUsedBits()) == 0 &&
11726          "Slices are not supposed to overlap.");
11727   UsedBits |= Second.getUsedBits();
11728   return areUsedBitsDense(UsedBits);
11729 }
11730
11731 /// \brief Adjust the \p GlobalLSCost according to the target
11732 /// paring capabilities and the layout of the slices.
11733 /// \pre \p GlobalLSCost should account for at least as many loads as
11734 /// there is in the slices in \p LoadedSlices.
11735 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11736                                  LoadedSlice::Cost &GlobalLSCost) {
11737   unsigned NumberOfSlices = LoadedSlices.size();
11738   // If there is less than 2 elements, no pairing is possible.
11739   if (NumberOfSlices < 2)
11740     return;
11741
11742   // Sort the slices so that elements that are likely to be next to each
11743   // other in memory are next to each other in the list.
11744   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11745             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11746     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11747     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11748   });
11749   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11750   // First (resp. Second) is the first (resp. Second) potentially candidate
11751   // to be placed in a paired load.
11752   const LoadedSlice *First = nullptr;
11753   const LoadedSlice *Second = nullptr;
11754   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11755                 // Set the beginning of the pair.
11756                                                            First = Second) {
11757
11758     Second = &LoadedSlices[CurrSlice];
11759
11760     // If First is NULL, it means we start a new pair.
11761     // Get to the next slice.
11762     if (!First)
11763       continue;
11764
11765     EVT LoadedType = First->getLoadedType();
11766
11767     // If the types of the slices are different, we cannot pair them.
11768     if (LoadedType != Second->getLoadedType())
11769       continue;
11770
11771     // Check if the target supplies paired loads for this type.
11772     unsigned RequiredAlignment = 0;
11773     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11774       // move to the next pair, this type is hopeless.
11775       Second = nullptr;
11776       continue;
11777     }
11778     // Check if we meet the alignment requirement.
11779     if (RequiredAlignment > First->getAlignment())
11780       continue;
11781
11782     // Check that both loads are next to each other in memory.
11783     if (!areSlicesNextToEachOther(*First, *Second))
11784       continue;
11785
11786     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11787     --GlobalLSCost.Loads;
11788     // Move to the next pair.
11789     Second = nullptr;
11790   }
11791 }
11792
11793 /// \brief Check the profitability of all involved LoadedSlice.
11794 /// Currently, it is considered profitable if there is exactly two
11795 /// involved slices (1) which are (2) next to each other in memory, and
11796 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11797 ///
11798 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11799 /// the elements themselves.
11800 ///
11801 /// FIXME: When the cost model will be mature enough, we can relax
11802 /// constraints (1) and (2).
11803 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11804                                 const APInt &UsedBits, bool ForCodeSize) {
11805   unsigned NumberOfSlices = LoadedSlices.size();
11806   if (StressLoadSlicing)
11807     return NumberOfSlices > 1;
11808
11809   // Check (1).
11810   if (NumberOfSlices != 2)
11811     return false;
11812
11813   // Check (2).
11814   if (!areUsedBitsDense(UsedBits))
11815     return false;
11816
11817   // Check (3).
11818   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11819   // The original code has one big load.
11820   OrigCost.Loads = 1;
11821   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11822     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11823     // Accumulate the cost of all the slices.
11824     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11825     GlobalSlicingCost += SliceCost;
11826
11827     // Account as cost in the original configuration the gain obtained
11828     // with the current slices.
11829     OrigCost.addSliceGain(LS);
11830   }
11831
11832   // If the target supports paired load, adjust the cost accordingly.
11833   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11834   return OrigCost > GlobalSlicingCost;
11835 }
11836
11837 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
11838 /// operations, split it in the various pieces being extracted.
11839 ///
11840 /// This sort of thing is introduced by SROA.
11841 /// This slicing takes care not to insert overlapping loads.
11842 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
11843 bool DAGCombiner::SliceUpLoad(SDNode *N) {
11844   if (Level < AfterLegalizeDAG)
11845     return false;
11846
11847   LoadSDNode *LD = cast<LoadSDNode>(N);
11848   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
11849       !LD->getValueType(0).isInteger())
11850     return false;
11851
11852   // Keep track of already used bits to detect overlapping values.
11853   // In that case, we will just abort the transformation.
11854   APInt UsedBits(LD->getValueSizeInBits(0), 0);
11855
11856   SmallVector<LoadedSlice, 4> LoadedSlices;
11857
11858   // Check if this load is used as several smaller chunks of bits.
11859   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
11860   // of computation for each trunc.
11861   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
11862        UI != UIEnd; ++UI) {
11863     // Skip the uses of the chain.
11864     if (UI.getUse().getResNo() != 0)
11865       continue;
11866
11867     SDNode *User = *UI;
11868     unsigned Shift = 0;
11869
11870     // Check if this is a trunc(lshr).
11871     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
11872         isa<ConstantSDNode>(User->getOperand(1))) {
11873       Shift = User->getConstantOperandVal(1);
11874       User = *User->use_begin();
11875     }
11876
11877     // At this point, User is a Truncate, iff we encountered, trunc or
11878     // trunc(lshr).
11879     if (User->getOpcode() != ISD::TRUNCATE)
11880       return false;
11881
11882     // The width of the type must be a power of 2 and greater than 8-bits.
11883     // Otherwise the load cannot be represented in LLVM IR.
11884     // Moreover, if we shifted with a non-8-bits multiple, the slice
11885     // will be across several bytes. We do not support that.
11886     unsigned Width = User->getValueSizeInBits(0);
11887     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
11888       return 0;
11889
11890     // Build the slice for this chain of computations.
11891     LoadedSlice LS(User, LD, Shift, &DAG);
11892     APInt CurrentUsedBits = LS.getUsedBits();
11893
11894     // Check if this slice overlaps with another.
11895     if ((CurrentUsedBits & UsedBits) != 0)
11896       return false;
11897     // Update the bits used globally.
11898     UsedBits |= CurrentUsedBits;
11899
11900     // Check if the new slice would be legal.
11901     if (!LS.isLegal())
11902       return false;
11903
11904     // Record the slice.
11905     LoadedSlices.push_back(LS);
11906   }
11907
11908   // Abort slicing if it does not seem to be profitable.
11909   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
11910     return false;
11911
11912   ++SlicedLoads;
11913
11914   // Rewrite each chain to use an independent load.
11915   // By construction, each chain can be represented by a unique load.
11916
11917   // Prepare the argument for the new token factor for all the slices.
11918   SmallVector<SDValue, 8> ArgChains;
11919   for (SmallVectorImpl<LoadedSlice>::const_iterator
11920            LSIt = LoadedSlices.begin(),
11921            LSItEnd = LoadedSlices.end();
11922        LSIt != LSItEnd; ++LSIt) {
11923     SDValue SliceInst = LSIt->loadSlice();
11924     CombineTo(LSIt->Inst, SliceInst, true);
11925     if (SliceInst.getOpcode() != ISD::LOAD)
11926       SliceInst = SliceInst.getOperand(0);
11927     assert(SliceInst->getOpcode() == ISD::LOAD &&
11928            "It takes more than a zext to get to the loaded slice!!");
11929     ArgChains.push_back(SliceInst.getValue(1));
11930   }
11931
11932   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
11933                               ArgChains);
11934   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11935   AddToWorklist(Chain.getNode());
11936   return true;
11937 }
11938
11939 /// Check to see if V is (and load (ptr), imm), where the load is having
11940 /// specific bytes cleared out.  If so, return the byte size being masked out
11941 /// and the shift amount.
11942 static std::pair<unsigned, unsigned>
11943 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
11944   std::pair<unsigned, unsigned> Result(0, 0);
11945
11946   // Check for the structure we're looking for.
11947   if (V->getOpcode() != ISD::AND ||
11948       !isa<ConstantSDNode>(V->getOperand(1)) ||
11949       !ISD::isNormalLoad(V->getOperand(0).getNode()))
11950     return Result;
11951
11952   // Check the chain and pointer.
11953   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
11954   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
11955
11956   // The store should be chained directly to the load or be an operand of a
11957   // tokenfactor.
11958   if (LD == Chain.getNode())
11959     ; // ok.
11960   else if (Chain->getOpcode() != ISD::TokenFactor)
11961     return Result; // Fail.
11962   else {
11963     bool isOk = false;
11964     for (const SDValue &ChainOp : Chain->op_values())
11965       if (ChainOp.getNode() == LD) {
11966         isOk = true;
11967         break;
11968       }
11969     if (!isOk) return Result;
11970   }
11971
11972   // This only handles simple types.
11973   if (V.getValueType() != MVT::i16 &&
11974       V.getValueType() != MVT::i32 &&
11975       V.getValueType() != MVT::i64)
11976     return Result;
11977
11978   // Check the constant mask.  Invert it so that the bits being masked out are
11979   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
11980   // follow the sign bit for uniformity.
11981   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
11982   unsigned NotMaskLZ = countLeadingZeros(NotMask);
11983   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
11984   unsigned NotMaskTZ = countTrailingZeros(NotMask);
11985   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
11986   if (NotMaskLZ == 64) return Result;  // All zero mask.
11987
11988   // See if we have a continuous run of bits.  If so, we have 0*1+0*
11989   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
11990     return Result;
11991
11992   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
11993   if (V.getValueType() != MVT::i64 && NotMaskLZ)
11994     NotMaskLZ -= 64-V.getValueSizeInBits();
11995
11996   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
11997   switch (MaskedBytes) {
11998   case 1:
11999   case 2:
12000   case 4: break;
12001   default: return Result; // All one mask, or 5-byte mask.
12002   }
12003
12004   // Verify that the first bit starts at a multiple of mask so that the access
12005   // is aligned the same as the access width.
12006   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12007
12008   Result.first = MaskedBytes;
12009   Result.second = NotMaskTZ/8;
12010   return Result;
12011 }
12012
12013
12014 /// Check to see if IVal is something that provides a value as specified by
12015 /// MaskInfo. If so, replace the specified store with a narrower store of
12016 /// truncated IVal.
12017 static SDNode *
12018 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12019                                 SDValue IVal, StoreSDNode *St,
12020                                 DAGCombiner *DC) {
12021   unsigned NumBytes = MaskInfo.first;
12022   unsigned ByteShift = MaskInfo.second;
12023   SelectionDAG &DAG = DC->getDAG();
12024
12025   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12026   // that uses this.  If not, this is not a replacement.
12027   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12028                                   ByteShift*8, (ByteShift+NumBytes)*8);
12029   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12030
12031   // Check that it is legal on the target to do this.  It is legal if the new
12032   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12033   // legalization.
12034   MVT VT = MVT::getIntegerVT(NumBytes*8);
12035   if (!DC->isTypeLegal(VT))
12036     return nullptr;
12037
12038   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12039   // shifted by ByteShift and truncated down to NumBytes.
12040   if (ByteShift) {
12041     SDLoc DL(IVal);
12042     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12043                        DAG.getConstant(ByteShift*8, DL,
12044                                     DC->getShiftAmountTy(IVal.getValueType())));
12045   }
12046
12047   // Figure out the offset for the store and the alignment of the access.
12048   unsigned StOffset;
12049   unsigned NewAlign = St->getAlignment();
12050
12051   if (DAG.getDataLayout().isLittleEndian())
12052     StOffset = ByteShift;
12053   else
12054     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12055
12056   SDValue Ptr = St->getBasePtr();
12057   if (StOffset) {
12058     SDLoc DL(IVal);
12059     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12060                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12061     NewAlign = MinAlign(NewAlign, StOffset);
12062   }
12063
12064   // Truncate down to the new size.
12065   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12066
12067   ++OpsNarrowed;
12068   return DAG
12069       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12070                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12071       .getNode();
12072 }
12073
12074
12075 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12076 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12077 /// narrowing the load and store if it would end up being a win for performance
12078 /// or code size.
12079 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12080   StoreSDNode *ST  = cast<StoreSDNode>(N);
12081   if (ST->isVolatile())
12082     return SDValue();
12083
12084   SDValue Chain = ST->getChain();
12085   SDValue Value = ST->getValue();
12086   SDValue Ptr   = ST->getBasePtr();
12087   EVT VT = Value.getValueType();
12088
12089   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12090     return SDValue();
12091
12092   unsigned Opc = Value.getOpcode();
12093
12094   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12095   // is a byte mask indicating a consecutive number of bytes, check to see if
12096   // Y is known to provide just those bytes.  If so, we try to replace the
12097   // load + replace + store sequence with a single (narrower) store, which makes
12098   // the load dead.
12099   if (Opc == ISD::OR) {
12100     std::pair<unsigned, unsigned> MaskedLoad;
12101     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12102     if (MaskedLoad.first)
12103       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12104                                                   Value.getOperand(1), ST,this))
12105         return SDValue(NewST, 0);
12106
12107     // Or is commutative, so try swapping X and Y.
12108     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12109     if (MaskedLoad.first)
12110       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12111                                                   Value.getOperand(0), ST,this))
12112         return SDValue(NewST, 0);
12113   }
12114
12115   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12116       Value.getOperand(1).getOpcode() != ISD::Constant)
12117     return SDValue();
12118
12119   SDValue N0 = Value.getOperand(0);
12120   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12121       Chain == SDValue(N0.getNode(), 1)) {
12122     LoadSDNode *LD = cast<LoadSDNode>(N0);
12123     if (LD->getBasePtr() != Ptr ||
12124         LD->getPointerInfo().getAddrSpace() !=
12125         ST->getPointerInfo().getAddrSpace())
12126       return SDValue();
12127
12128     // Find the type to narrow it the load / op / store to.
12129     SDValue N1 = Value.getOperand(1);
12130     unsigned BitWidth = N1.getValueSizeInBits();
12131     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12132     if (Opc == ISD::AND)
12133       Imm ^= APInt::getAllOnesValue(BitWidth);
12134     if (Imm == 0 || Imm.isAllOnesValue())
12135       return SDValue();
12136     unsigned ShAmt = Imm.countTrailingZeros();
12137     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12138     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12139     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12140     // The narrowing should be profitable, the load/store operation should be
12141     // legal (or custom) and the store size should be equal to the NewVT width.
12142     while (NewBW < BitWidth &&
12143            (NewVT.getStoreSizeInBits() != NewBW ||
12144             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12145             !TLI.isNarrowingProfitable(VT, NewVT))) {
12146       NewBW = NextPowerOf2(NewBW);
12147       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12148     }
12149     if (NewBW >= BitWidth)
12150       return SDValue();
12151
12152     // If the lsb changed does not start at the type bitwidth boundary,
12153     // start at the previous one.
12154     if (ShAmt % NewBW)
12155       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12156     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12157                                    std::min(BitWidth, ShAmt + NewBW));
12158     if ((Imm & Mask) == Imm) {
12159       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12160       if (Opc == ISD::AND)
12161         NewImm ^= APInt::getAllOnesValue(NewBW);
12162       uint64_t PtrOff = ShAmt / 8;
12163       // For big endian targets, we need to adjust the offset to the pointer to
12164       // load the correct bytes.
12165       if (DAG.getDataLayout().isBigEndian())
12166         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12167
12168       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12169       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12170       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12171         return SDValue();
12172
12173       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12174                                    Ptr.getValueType(), Ptr,
12175                                    DAG.getConstant(PtrOff, SDLoc(LD),
12176                                                    Ptr.getValueType()));
12177       SDValue NewLD =
12178           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12179                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12180                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12181       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12182                                    DAG.getConstant(NewImm, SDLoc(Value),
12183                                                    NewVT));
12184       SDValue NewST =
12185           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12186                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12187
12188       AddToWorklist(NewPtr.getNode());
12189       AddToWorklist(NewLD.getNode());
12190       AddToWorklist(NewVal.getNode());
12191       WorklistRemover DeadNodes(*this);
12192       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12193       ++OpsNarrowed;
12194       return NewST;
12195     }
12196   }
12197
12198   return SDValue();
12199 }
12200
12201 /// For a given floating point load / store pair, if the load value isn't used
12202 /// by any other operations, then consider transforming the pair to integer
12203 /// load / store operations if the target deems the transformation profitable.
12204 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12205   StoreSDNode *ST  = cast<StoreSDNode>(N);
12206   SDValue Chain = ST->getChain();
12207   SDValue Value = ST->getValue();
12208   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12209       Value.hasOneUse() &&
12210       Chain == SDValue(Value.getNode(), 1)) {
12211     LoadSDNode *LD = cast<LoadSDNode>(Value);
12212     EVT VT = LD->getMemoryVT();
12213     if (!VT.isFloatingPoint() ||
12214         VT != ST->getMemoryVT() ||
12215         LD->isNonTemporal() ||
12216         ST->isNonTemporal() ||
12217         LD->getPointerInfo().getAddrSpace() != 0 ||
12218         ST->getPointerInfo().getAddrSpace() != 0)
12219       return SDValue();
12220
12221     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12222     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12223         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12224         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12225         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12226       return SDValue();
12227
12228     unsigned LDAlign = LD->getAlignment();
12229     unsigned STAlign = ST->getAlignment();
12230     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12231     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12232     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12233       return SDValue();
12234
12235     SDValue NewLD =
12236         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12237                     LD->getPointerInfo(), LDAlign);
12238
12239     SDValue NewST =
12240         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12241                      ST->getPointerInfo(), STAlign);
12242
12243     AddToWorklist(NewLD.getNode());
12244     AddToWorklist(NewST.getNode());
12245     WorklistRemover DeadNodes(*this);
12246     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12247     ++LdStFP2Int;
12248     return NewST;
12249   }
12250
12251   return SDValue();
12252 }
12253
12254 // This is a helper function for visitMUL to check the profitability
12255 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12256 // MulNode is the original multiply, AddNode is (add x, c1),
12257 // and ConstNode is c2.
12258 //
12259 // If the (add x, c1) has multiple uses, we could increase
12260 // the number of adds if we make this transformation.
12261 // It would only be worth doing this if we can remove a
12262 // multiply in the process. Check for that here.
12263 // To illustrate:
12264 //     (A + c1) * c3
12265 //     (A + c2) * c3
12266 // We're checking for cases where we have common "c3 * A" expressions.
12267 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12268                                               SDValue &AddNode,
12269                                               SDValue &ConstNode) {
12270   APInt Val;
12271
12272   // If the add only has one use, this would be OK to do.
12273   if (AddNode.getNode()->hasOneUse())
12274     return true;
12275
12276   // Walk all the users of the constant with which we're multiplying.
12277   for (SDNode *Use : ConstNode->uses()) {
12278
12279     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12280       continue;
12281
12282     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12283       SDNode *OtherOp;
12284       SDNode *MulVar = AddNode.getOperand(0).getNode();
12285
12286       // OtherOp is what we're multiplying against the constant.
12287       if (Use->getOperand(0) == ConstNode)
12288         OtherOp = Use->getOperand(1).getNode();
12289       else
12290         OtherOp = Use->getOperand(0).getNode();
12291
12292       // Check to see if multiply is with the same operand of our "add".
12293       //
12294       //     ConstNode  = CONST
12295       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12296       //     ...
12297       //     AddNode  = (A + c1)  <-- MulVar is A.
12298       //         = AddNode * ConstNode   <-- current visiting instruction.
12299       //
12300       // If we make this transformation, we will have a common
12301       // multiply (ConstNode * A) that we can save.
12302       if (OtherOp == MulVar)
12303         return true;
12304
12305       // Now check to see if a future expansion will give us a common
12306       // multiply.
12307       //
12308       //     ConstNode  = CONST
12309       //     AddNode    = (A + c1)
12310       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12311       //     ...
12312       //     OtherOp = (A + c2)
12313       //     Use     = OtherOp * ConstNode <-- visiting Use.
12314       //
12315       // If we make this transformation, we will have a common
12316       // multiply (CONST * A) after we also do the same transformation
12317       // to the "t2" instruction.
12318       if (OtherOp->getOpcode() == ISD::ADD &&
12319           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12320           OtherOp->getOperand(0).getNode() == MulVar)
12321         return true;
12322     }
12323   }
12324
12325   // Didn't find a case where this would be profitable.
12326   return false;
12327 }
12328
12329 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12330                                          unsigned NumStores) {
12331   SmallVector<SDValue, 8> Chains;
12332   SmallPtrSet<const SDNode *, 8> Visited;
12333   SDLoc StoreDL(StoreNodes[0].MemNode);
12334
12335   for (unsigned i = 0; i < NumStores; ++i) {
12336     Visited.insert(StoreNodes[i].MemNode);
12337   }
12338
12339   // don't include nodes that are children
12340   for (unsigned i = 0; i < NumStores; ++i) {
12341     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12342       Chains.push_back(StoreNodes[i].MemNode->getChain());
12343   }
12344
12345   assert(Chains.size() > 0 && "Chain should have generated a chain");
12346   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12347 }
12348
12349 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12350     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12351     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12352   // Make sure we have something to merge.
12353   if (NumStores < 2)
12354     return false;
12355
12356   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12357
12358   // The latest Node in the DAG.
12359   SDLoc DL(StoreNodes[0].MemNode);
12360
12361   SDValue StoredVal;
12362   if (UseVector) {
12363     bool IsVec = MemVT.isVector();
12364     unsigned Elts = NumStores;
12365     if (IsVec) {
12366       // When merging vector stores, get the total number of elements.
12367       Elts *= MemVT.getVectorNumElements();
12368     }
12369     // Get the type for the merged vector store.
12370     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12371     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
12372
12373     if (IsConstantSrc) {
12374       SmallVector<SDValue, 8> BuildVector;
12375       for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
12376         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12377         SDValue Val = St->getValue();
12378         if (MemVT.getScalarType().isInteger())
12379           if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
12380             Val = DAG.getConstant(
12381                 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12382                 SDLoc(CFP), MemVT);
12383         BuildVector.push_back(Val);
12384       }
12385       StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
12386     } else {
12387       SmallVector<SDValue, 8> Ops;
12388       for (unsigned i = 0; i < NumStores; ++i) {
12389         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12390         SDValue Val = St->getValue();
12391         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12392         if (Val.getValueType() != MemVT)
12393           return false;
12394         Ops.push_back(Val);
12395       }
12396
12397       // Build the extracted vector elements back into a vector.
12398       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
12399                               DL, Ty, Ops);    }
12400   } else {
12401     // We should always use a vector store when merging extracted vector
12402     // elements, so this path implies a store of constants.
12403     assert(IsConstantSrc && "Merged vector elements should use vector store");
12404
12405     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12406     APInt StoreInt(SizeInBits, 0);
12407
12408     // Construct a single integer constant which is made of the smaller
12409     // constant inputs.
12410     bool IsLE = DAG.getDataLayout().isLittleEndian();
12411     for (unsigned i = 0; i < NumStores; ++i) {
12412       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12413       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12414
12415       SDValue Val = St->getValue();
12416       StoreInt <<= ElementSizeBytes * 8;
12417       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12418         StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12419       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12420         StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12421       } else {
12422         llvm_unreachable("Invalid constant element type");
12423       }
12424     }
12425
12426     // Create the new Load and Store operations.
12427     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12428     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12429   }
12430
12431   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12432   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12433
12434   // make sure we use trunc store if it's necessary to be legal.
12435   SDValue NewStore;
12436   if (UseVector || !UseTrunc) {
12437     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12438                             FirstInChain->getPointerInfo(),
12439                             FirstInChain->getAlignment());
12440   } else { // Must be realized as a trunc store
12441     EVT LegalizedStoredValueTy =
12442         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12443     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12444     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12445     SDValue ExtendedStoreVal =
12446         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12447                         LegalizedStoredValueTy);
12448     NewStore = DAG.getTruncStore(
12449         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12450         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12451         FirstInChain->getAlignment(),
12452         FirstInChain->getMemOperand()->getFlags());
12453   }
12454
12455   // Replace all merged stores with the new store.
12456   for (unsigned i = 0; i < NumStores; ++i)
12457     CombineTo(StoreNodes[i].MemNode, NewStore);
12458
12459   AddToWorklist(NewChain.getNode());
12460   return true;
12461 }
12462
12463 void DAGCombiner::getStoreMergeCandidates(
12464     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12465   // This holds the base pointer, index, and the offset in bytes from the base
12466   // pointer.
12467   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12468   EVT MemVT = St->getMemoryVT();
12469
12470   // We must have a base and an offset.
12471   if (!BasePtr.getBase().getNode())
12472     return;
12473
12474   // Do not handle stores to undef base pointers.
12475   if (BasePtr.getBase().isUndef())
12476     return;
12477
12478   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
12479                        isa<ConstantFPSDNode>(St->getValue());
12480   bool IsExtractVecSrc =
12481       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12482        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12483   bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
12484   BaseIndexOffset LBasePtr;
12485   // Match on loadbaseptr if relevant.
12486   if (IsLoadSrc)
12487     LBasePtr = BaseIndexOffset::match(
12488         cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
12489
12490   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12491                             int64_t &Offset) -> bool {
12492     if (Other->isVolatile() || Other->isIndexed())
12493       return false;
12494     // We can merge constant floats to equivalent integers
12495     if (Other->getMemoryVT() != MemVT)
12496       if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
12497             isa<ConstantFPSDNode>(Other->getValue())))
12498         return false;
12499     if (IsLoadSrc) {
12500       // The Load's Base Ptr must also match
12501       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
12502         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12503         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
12504           return false;
12505       } else
12506         return false;
12507     }
12508     if (IsConstantSrc)
12509       if (!(isa<ConstantSDNode>(Other->getValue()) ||
12510             isa<ConstantFPSDNode>(Other->getValue())))
12511         return false;
12512     if (IsExtractVecSrc)
12513       if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12514             Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
12515         return false;
12516     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12517     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12518   };
12519   // We looking for a root node which is an ancestor to all mergable
12520   // stores. We search up through a load, to our root and then down
12521   // through all children. For instance we will find Store{1,2,3} if
12522   // St is Store1, Store2. or Store3 where the root is not a load
12523   // which always true for nonvolatile ops. TODO: Expand
12524   // the search to find all valid candidates through multiple layers of loads.
12525   //
12526   // Root
12527   // |-------|-------|
12528   // Load    Load    Store3
12529   // |       |
12530   // Store1   Store2
12531   //
12532   // FIXME: We should be able to climb and
12533   // descend TokenFactors to find candidates as well.
12534
12535   SDNode *RootNode = (St->getChain()).getNode();
12536
12537   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12538     RootNode = Ldn->getChain().getNode();
12539     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12540       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12541         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
12542           if (I2.getOperandNo() == 0)
12543             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
12544               BaseIndexOffset Ptr;
12545               int64_t PtrDiff;
12546               if (CandidateMatch(OtherST, Ptr, PtrDiff))
12547                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12548             }
12549   } else
12550     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12551       if (I.getOperandNo() == 0)
12552         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12553           BaseIndexOffset Ptr;
12554           int64_t PtrDiff;
12555           if (CandidateMatch(OtherST, Ptr, PtrDiff))
12556             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12557         }
12558 }
12559
12560 // We need to check that merging these stores does not cause a loop
12561 // in the DAG. Any store candidate may depend on another candidate
12562 // indirectly through its operand (we already consider dependencies
12563 // through the chain). Check in parallel by searching up from
12564 // non-chain operands of candidates.
12565 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12566     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12567   SmallPtrSet<const SDNode *, 16> Visited;
12568   SmallVector<const SDNode *, 8> Worklist;
12569   // search ops of store candidates
12570   for (unsigned i = 0; i < NumStores; ++i) {
12571     SDNode *n = StoreNodes[i].MemNode;
12572     // Potential loops may happen only through non-chain operands
12573     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12574       Worklist.push_back(n->getOperand(j).getNode());
12575   }
12576   // search through DAG. We can stop early if we find a storenode
12577   for (unsigned i = 0; i < NumStores; ++i) {
12578     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12579       return false;
12580   }
12581   return true;
12582 }
12583
12584 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12585   if (OptLevel == CodeGenOpt::None)
12586     return false;
12587
12588   EVT MemVT = St->getMemoryVT();
12589   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12590
12591   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12592     return false;
12593
12594   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12595       Attribute::NoImplicitFloat);
12596
12597   // This function cannot currently deal with non-byte-sized memory sizes.
12598   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12599     return false;
12600
12601   if (!MemVT.isSimple())
12602     return false;
12603
12604   // Perform an early exit check. Do not bother looking at stored values that
12605   // are not constants, loads, or extracted vector elements.
12606   SDValue StoredVal = St->getValue();
12607   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12608   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12609                        isa<ConstantFPSDNode>(StoredVal);
12610   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12611                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12612
12613   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12614     return false;
12615
12616   // Don't merge vectors into wider vectors if the source data comes from loads.
12617   // TODO: This restriction can be lifted by using logic similar to the
12618   // ExtractVecSrc case.
12619   if (MemVT.isVector() && IsLoadSrc)
12620     return false;
12621
12622   SmallVector<MemOpLink, 8> StoreNodes;
12623   // Find potential store merge candidates by searching through chain sub-DAG
12624   getStoreMergeCandidates(St, StoreNodes);
12625
12626   // Check if there is anything to merge.
12627   if (StoreNodes.size() < 2)
12628     return false;
12629
12630   // Sort the memory operands according to their distance from the
12631   // base pointer.
12632   std::sort(StoreNodes.begin(), StoreNodes.end(),
12633             [](MemOpLink LHS, MemOpLink RHS) {
12634               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12635             });
12636
12637   // Store Merge attempts to merge the lowest stores. This generally
12638   // works out as if successful, as the remaining stores are checked
12639   // after the first collection of stores is merged. However, in the
12640   // case that a non-mergeable store is found first, e.g., {p[-2],
12641   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12642   // mergeable cases. To prevent this, we prune such stores from the
12643   // front of StoreNodes here.
12644
12645   bool RV = false;
12646   while (StoreNodes.size() > 1) {
12647     unsigned StartIdx = 0;
12648     while ((StartIdx + 1 < StoreNodes.size()) &&
12649            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12650                StoreNodes[StartIdx + 1].OffsetFromBase)
12651       ++StartIdx;
12652
12653     // Bail if we don't have enough candidates to merge.
12654     if (StartIdx + 1 >= StoreNodes.size())
12655       return RV;
12656
12657     if (StartIdx)
12658       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12659
12660     // Scan the memory operations on the chain and find the first
12661     // non-consecutive store memory address.
12662     unsigned NumConsecutiveStores = 1;
12663     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12664     // Check that the addresses are consecutive starting from the second
12665     // element in the list of stores.
12666     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12667       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12668       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12669         break;
12670       NumConsecutiveStores = i + 1;
12671     }
12672
12673     if (NumConsecutiveStores < 2) {
12674       StoreNodes.erase(StoreNodes.begin(),
12675                        StoreNodes.begin() + NumConsecutiveStores);
12676       continue;
12677     }
12678
12679     // Check that we can merge these candidates without causing a cycle
12680     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
12681                                                   NumConsecutiveStores)) {
12682       StoreNodes.erase(StoreNodes.begin(),
12683                        StoreNodes.begin() + NumConsecutiveStores);
12684       continue;
12685     }
12686
12687     // The node with the lowest store address.
12688     LLVMContext &Context = *DAG.getContext();
12689     const DataLayout &DL = DAG.getDataLayout();
12690
12691     // Store the constants into memory as one consecutive store.
12692     if (IsConstantSrc) {
12693       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12694       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12695       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12696       unsigned LastLegalType = 1;
12697       unsigned LastLegalVectorType = 1;
12698       bool LastIntegerTrunc = false;
12699       bool NonZero = false;
12700       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12701         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12702         SDValue StoredVal = ST->getValue();
12703
12704         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
12705           NonZero |= !C->isNullValue();
12706         } else if (ConstantFPSDNode *C =
12707                        dyn_cast<ConstantFPSDNode>(StoredVal)) {
12708           NonZero |= !C->getConstantFPValue()->isNullValue();
12709         } else {
12710           // Non-constant.
12711           break;
12712         }
12713
12714         // Find a legal type for the constant store.
12715         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12716         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12717         bool IsFast = false;
12718         if (TLI.isTypeLegal(StoreTy) &&
12719             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12720             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12721                                    FirstStoreAlign, &IsFast) &&
12722             IsFast) {
12723           LastIntegerTrunc = false;
12724           LastLegalType = i + 1;
12725           // Or check whether a truncstore is legal.
12726         } else if (TLI.getTypeAction(Context, StoreTy) ==
12727                    TargetLowering::TypePromoteInteger) {
12728           EVT LegalizedStoredValueTy =
12729               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12730           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12731               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12732               TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12733                                      FirstStoreAS, FirstStoreAlign, &IsFast) &&
12734               IsFast) {
12735             LastIntegerTrunc = true;
12736             LastLegalType = i + 1;
12737           }
12738         }
12739
12740         // We only use vectors if the constant is known to be zero or the target
12741         // allows it and the function is not marked with the noimplicitfloat
12742         // attribute.
12743         if ((!NonZero ||
12744              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12745             !NoVectors) {
12746           // Find a legal type for the vector store.
12747           EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
12748           if (TLI.isTypeLegal(Ty) &&
12749               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
12750               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12751                                      FirstStoreAlign, &IsFast) &&
12752               IsFast)
12753             LastLegalVectorType = i + 1;
12754         }
12755       }
12756
12757       // Check if we found a legal integer type that creates a meaningful merge.
12758       if (LastLegalType < 2 && LastLegalVectorType < 2) {
12759         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12760         continue;
12761       }
12762
12763       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12764       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12765
12766       bool Merged = MergeStoresOfConstantsOrVecElts(
12767           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
12768       if (!Merged) {
12769         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12770         continue;
12771       }
12772       // Remove merged stores for next iteration.
12773       RV = true;
12774       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12775       continue;
12776     }
12777
12778     // When extracting multiple vector elements, try to store them
12779     // in one vector store rather than a sequence of scalar stores.
12780     if (IsExtractVecSrc) {
12781       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12782       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12783       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12784       unsigned NumStoresToMerge = 1;
12785       bool IsVec = MemVT.isVector();
12786       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12787         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12788         unsigned StoreValOpcode = St->getValue().getOpcode();
12789         // This restriction could be loosened.
12790         // Bail out if any stored values are not elements extracted from a
12791         // vector. It should be possible to handle mixed sources, but load
12792         // sources need more careful handling (see the block of code below that
12793         // handles consecutive loads).
12794         if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
12795             StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
12796           return RV;
12797
12798         // Find a legal type for the vector store.
12799         unsigned Elts = i + 1;
12800         if (IsVec) {
12801           // When merging vector stores, get the total number of elements.
12802           Elts *= MemVT.getVectorNumElements();
12803         }
12804         EVT Ty =
12805             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12806         bool IsFast;
12807         if (TLI.isTypeLegal(Ty) &&
12808             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
12809             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12810                                    FirstStoreAlign, &IsFast) &&
12811             IsFast)
12812           NumStoresToMerge = i + 1;
12813       }
12814
12815       bool Merged = MergeStoresOfConstantsOrVecElts(
12816           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
12817       if (!Merged) {
12818         StoreNodes.erase(StoreNodes.begin(),
12819                          StoreNodes.begin() + NumStoresToMerge);
12820         continue;
12821       }
12822       // Remove merged stores for next iteration.
12823       StoreNodes.erase(StoreNodes.begin(),
12824                        StoreNodes.begin() + NumStoresToMerge);
12825       RV = true;
12826       continue;
12827     }
12828
12829     // Below we handle the case of multiple consecutive stores that
12830     // come from multiple consecutive loads. We merge them into a single
12831     // wide load and a single wide store.
12832
12833     // Look for load nodes which are used by the stored values.
12834     SmallVector<MemOpLink, 8> LoadNodes;
12835
12836     // Find acceptable loads. Loads need to have the same chain (token factor),
12837     // must not be zext, volatile, indexed, and they must be consecutive.
12838     BaseIndexOffset LdBasePtr;
12839     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12840       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12841       LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
12842       if (!Ld)
12843         break;
12844
12845       // Loads must only have one use.
12846       if (!Ld->hasNUsesOfValue(1, 0))
12847         break;
12848
12849       // The memory operands must not be volatile.
12850       if (Ld->isVolatile() || Ld->isIndexed())
12851         break;
12852
12853       // We do not accept ext loads.
12854       if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12855         break;
12856
12857       // The stored memory type must be the same.
12858       if (Ld->getMemoryVT() != MemVT)
12859         break;
12860
12861       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12862       // If this is not the first ptr that we check.
12863       int64_t LdOffset = 0;
12864       if (LdBasePtr.getBase().getNode()) {
12865         // The base ptr must be the same.
12866         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
12867           break;
12868       } else {
12869         // Check that all other base pointers are the same as this one.
12870         LdBasePtr = LdPtr;
12871       }
12872
12873       // We found a potential memory operand to merge.
12874       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
12875     }
12876
12877     if (LoadNodes.size() < 2) {
12878       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12879       continue;
12880     }
12881
12882     // If we have load/store pair instructions and we only have two values,
12883     // don't bother merging.
12884     unsigned RequiredAlignment;
12885     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
12886         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
12887       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
12888       continue;
12889     }
12890     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12891     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12892     unsigned FirstStoreAlign = FirstInChain->getAlignment();
12893     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
12894     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
12895     unsigned FirstLoadAlign = FirstLoad->getAlignment();
12896
12897     // Scan the memory operations on the chain and find the first
12898     // non-consecutive load memory address. These variables hold the index in
12899     // the store node array.
12900     unsigned LastConsecutiveLoad = 1;
12901     // This variable refers to the size and not index in the array.
12902     unsigned LastLegalVectorType = 1;
12903     unsigned LastLegalIntegerType = 1;
12904     bool isDereferenceable = true;
12905     bool DoIntegerTruncate = false;
12906     StartAddress = LoadNodes[0].OffsetFromBase;
12907     SDValue FirstChain = FirstLoad->getChain();
12908     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
12909       // All loads must share the same chain.
12910       if (LoadNodes[i].MemNode->getChain() != FirstChain)
12911         break;
12912
12913       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
12914       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12915         break;
12916       LastConsecutiveLoad = i;
12917
12918       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
12919         isDereferenceable = false;
12920
12921       // Find a legal type for the vector store.
12922       EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
12923       bool IsFastSt, IsFastLd;
12924       if (TLI.isTypeLegal(StoreTy) &&
12925           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12926           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12927                                  FirstStoreAlign, &IsFastSt) &&
12928           IsFastSt &&
12929           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12930                                  FirstLoadAlign, &IsFastLd) &&
12931           IsFastLd) {
12932         LastLegalVectorType = i + 1;
12933       }
12934
12935       // Find a legal type for the integer store.
12936       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12937       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12938       if (TLI.isTypeLegal(StoreTy) &&
12939           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12940           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12941                                  FirstStoreAlign, &IsFastSt) &&
12942           IsFastSt &&
12943           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12944                                  FirstLoadAlign, &IsFastLd) &&
12945           IsFastLd) {
12946         LastLegalIntegerType = i + 1;
12947         DoIntegerTruncate = false;
12948         // Or check whether a truncstore and extload is legal.
12949       } else if (TLI.getTypeAction(Context, StoreTy) ==
12950                  TargetLowering::TypePromoteInteger) {
12951         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
12952         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12953             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12954             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
12955                                StoreTy) &&
12956             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
12957                                StoreTy) &&
12958             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12959             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12960                                    FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
12961             IsFastSt &&
12962             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12963                                    FirstLoadAlign, &IsFastLd) &&
12964             IsFastLd) {
12965           LastLegalIntegerType = i + 1;
12966           DoIntegerTruncate = true;
12967         }
12968       }
12969     }
12970
12971     // Only use vector types if the vector type is larger than the integer type.
12972     // If they are the same, use integers.
12973     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
12974     unsigned LastLegalType =
12975         std::max(LastLegalVectorType, LastLegalIntegerType);
12976
12977     // We add +1 here because the LastXXX variables refer to location while
12978     // the NumElem refers to array/index size.
12979     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
12980     NumElem = std::min(LastLegalType, NumElem);
12981
12982     if (NumElem < 2) {
12983       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12984       continue;
12985     }
12986
12987     // Find if it is better to use vectors or integers to load and store
12988     // to memory.
12989     EVT JointMemOpVT;
12990     if (UseVectorTy) {
12991       JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
12992     } else {
12993       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
12994       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
12995     }
12996
12997     SDLoc LoadDL(LoadNodes[0].MemNode);
12998     SDLoc StoreDL(StoreNodes[0].MemNode);
12999
13000     // The merged loads are required to have the same incoming chain, so
13001     // using the first's chain is acceptable.
13002
13003     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13004     AddToWorklist(NewStoreChain.getNode());
13005
13006     MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13007                                           MachineMemOperand::MODereferenceable:
13008                                           MachineMemOperand::MONone;
13009
13010     SDValue NewLoad, NewStore;
13011     if (UseVectorTy || !DoIntegerTruncate) {
13012       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13013                             FirstLoad->getBasePtr(),
13014                             FirstLoad->getPointerInfo(), FirstLoadAlign,
13015                             MMOFlags);
13016       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13017                               FirstInChain->getBasePtr(),
13018                               FirstInChain->getPointerInfo(), FirstStoreAlign);
13019     } else { // This must be the truncstore/extload case
13020       EVT ExtendedTy =
13021           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13022       NewLoad =
13023           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13024                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13025                          JointMemOpVT, FirstLoadAlign, MMOFlags);
13026       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13027                                    FirstInChain->getBasePtr(),
13028                                    FirstInChain->getPointerInfo(), JointMemOpVT,
13029                                    FirstInChain->getAlignment(),
13030                                    FirstInChain->getMemOperand()->getFlags());
13031     }
13032
13033     // Transfer chain users from old loads to the new load.
13034     for (unsigned i = 0; i < NumElem; ++i) {
13035       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13036       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13037                                     SDValue(NewLoad.getNode(), 1));
13038     }
13039
13040     // Replace the all stores with the new store.
13041     for (unsigned i = 0; i < NumElem; ++i)
13042       CombineTo(StoreNodes[i].MemNode, NewStore);
13043     RV = true;
13044     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13045     continue;
13046   }
13047   return RV;
13048 }
13049
13050 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13051   SDLoc SL(ST);
13052   SDValue ReplStore;
13053
13054   // Replace the chain to avoid dependency.
13055   if (ST->isTruncatingStore()) {
13056     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13057                                   ST->getBasePtr(), ST->getMemoryVT(),
13058                                   ST->getMemOperand());
13059   } else {
13060     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13061                              ST->getMemOperand());
13062   }
13063
13064   // Create token to keep both nodes around.
13065   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13066                               MVT::Other, ST->getChain(), ReplStore);
13067
13068   // Make sure the new and old chains are cleaned up.
13069   AddToWorklist(Token.getNode());
13070
13071   // Don't add users to work list.
13072   return CombineTo(ST, Token, false);
13073 }
13074
13075 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13076   SDValue Value = ST->getValue();
13077   if (Value.getOpcode() == ISD::TargetConstantFP)
13078     return SDValue();
13079
13080   SDLoc DL(ST);
13081
13082   SDValue Chain = ST->getChain();
13083   SDValue Ptr = ST->getBasePtr();
13084
13085   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13086
13087   // NOTE: If the original store is volatile, this transform must not increase
13088   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13089   // processor operation but an i64 (which is not legal) requires two.  So the
13090   // transform should not be done in this case.
13091
13092   SDValue Tmp;
13093   switch (CFP->getSimpleValueType(0).SimpleTy) {
13094   default:
13095     llvm_unreachable("Unknown FP type");
13096   case MVT::f16:    // We don't do this for these yet.
13097   case MVT::f80:
13098   case MVT::f128:
13099   case MVT::ppcf128:
13100     return SDValue();
13101   case MVT::f32:
13102     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13103         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13104       ;
13105       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13106                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13107                             MVT::i32);
13108       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13109     }
13110
13111     return SDValue();
13112   case MVT::f64:
13113     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13114          !ST->isVolatile()) ||
13115         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13116       ;
13117       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13118                             getZExtValue(), SDLoc(CFP), MVT::i64);
13119       return DAG.getStore(Chain, DL, Tmp,
13120                           Ptr, ST->getMemOperand());
13121     }
13122
13123     if (!ST->isVolatile() &&
13124         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13125       // Many FP stores are not made apparent until after legalize, e.g. for
13126       // argument passing.  Since this is so common, custom legalize the
13127       // 64-bit integer store into two 32-bit stores.
13128       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13129       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13130       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13131       if (DAG.getDataLayout().isBigEndian())
13132         std::swap(Lo, Hi);
13133
13134       unsigned Alignment = ST->getAlignment();
13135       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13136       AAMDNodes AAInfo = ST->getAAInfo();
13137
13138       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13139                                  ST->getAlignment(), MMOFlags, AAInfo);
13140       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13141                         DAG.getConstant(4, DL, Ptr.getValueType()));
13142       Alignment = MinAlign(Alignment, 4U);
13143       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13144                                  ST->getPointerInfo().getWithOffset(4),
13145                                  Alignment, MMOFlags, AAInfo);
13146       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13147                          St0, St1);
13148     }
13149
13150     return SDValue();
13151   }
13152 }
13153
13154 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13155   StoreSDNode *ST  = cast<StoreSDNode>(N);
13156   SDValue Chain = ST->getChain();
13157   SDValue Value = ST->getValue();
13158   SDValue Ptr   = ST->getBasePtr();
13159
13160   // If this is a store of a bit convert, store the input value if the
13161   // resultant store does not need a higher alignment than the original.
13162   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13163       ST->isUnindexed()) {
13164     EVT SVT = Value.getOperand(0).getValueType();
13165     if (((!LegalOperations && !ST->isVolatile()) ||
13166          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13167         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13168       unsigned OrigAlign = ST->getAlignment();
13169       bool Fast = false;
13170       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13171                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13172           Fast) {
13173         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13174                             ST->getPointerInfo(), OrigAlign,
13175                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13176       }
13177     }
13178   }
13179
13180   // Turn 'store undef, Ptr' -> nothing.
13181   if (Value.isUndef() && ST->isUnindexed())
13182     return Chain;
13183
13184   // Try to infer better alignment information than the store already has.
13185   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13186     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13187       if (Align > ST->getAlignment()) {
13188         SDValue NewStore =
13189             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13190                               ST->getMemoryVT(), Align,
13191                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13192         if (NewStore.getNode() != N)
13193           return CombineTo(ST, NewStore, true);
13194       }
13195     }
13196   }
13197
13198   // Try transforming a pair floating point load / store ops to integer
13199   // load / store ops.
13200   if (SDValue NewST = TransformFPLoadStorePair(N))
13201     return NewST;
13202
13203   if (ST->isUnindexed()) {
13204     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13205     // adjacent stores.
13206     if (findBetterNeighborChains(ST)) {
13207       // replaceStoreChain uses CombineTo, which handled all of the worklist
13208       // manipulation. Return the original node to not do anything else.
13209       return SDValue(ST, 0);
13210     }
13211     Chain = ST->getChain();
13212   }
13213
13214   // FIXME: is there such a thing as a truncating indexed store?
13215   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13216       Value.getValueType().isInteger()) {
13217     // See if we can simplify the input to this truncstore with knowledge that
13218     // only the low bits are being used.  For example:
13219     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13220     SDValue Shorter = GetDemandedBits(
13221         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13222                                     ST->getMemoryVT().getScalarSizeInBits()));
13223     AddToWorklist(Value.getNode());
13224     if (Shorter.getNode())
13225       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13226                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13227
13228     // Otherwise, see if we can simplify the operation with
13229     // SimplifyDemandedBits, which only works if the value has a single use.
13230     if (SimplifyDemandedBits(
13231             Value,
13232             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13233                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13234       // Re-visit the store if anything changed and the store hasn't been merged
13235       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13236       // node back to the worklist if necessary, but we also need to re-visit
13237       // the Store node itself.
13238       if (N->getOpcode() != ISD::DELETED_NODE)
13239         AddToWorklist(N);
13240       return SDValue(N, 0);
13241     }
13242   }
13243
13244   // If this is a load followed by a store to the same location, then the store
13245   // is dead/noop.
13246   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13247     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13248         ST->isUnindexed() && !ST->isVolatile() &&
13249         // There can't be any side effects between the load and store, such as
13250         // a call or store.
13251         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13252       // The store is dead, remove it.
13253       return Chain;
13254     }
13255   }
13256
13257   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13258     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13259         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13260         ST->getMemoryVT() == ST1->getMemoryVT()) {
13261       // If this is a store followed by a store with the same value to the same
13262       // location, then the store is dead/noop.
13263       if (ST1->getValue() == Value) {
13264         // The store is dead, remove it.
13265         return Chain;
13266       }
13267
13268       // If this is a store who's preceeding store to the same location
13269       // and no one other node is chained to that store we can effectively
13270       // drop the store. Do not remove stores to undef as they may be used as
13271       // data sinks.
13272       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13273           !ST1->getBasePtr().isUndef()) {
13274         // ST1 is fully overwritten and can be elided. Combine with it's chain
13275         // value.
13276         CombineTo(ST1, ST1->getChain());
13277         return SDValue();
13278       }
13279     }
13280   }
13281
13282   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13283   // truncating store.  We can do this even if this is already a truncstore.
13284   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13285       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13286       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13287                             ST->getMemoryVT())) {
13288     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13289                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13290   }
13291
13292   // Only perform this optimization before the types are legal, because we
13293   // don't want to perform this optimization on every DAGCombine invocation.
13294   if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
13295                                            : !LegalTypes) {
13296     for (;;) {
13297       // There can be multiple store sequences on the same chain.
13298       // Keep trying to merge store sequences until we are unable to do so
13299       // or until we merge the last store on the chain.
13300       bool Changed = MergeConsecutiveStores(ST);
13301       if (!Changed) break;
13302       // Return N as merge only uses CombineTo and no worklist clean
13303       // up is necessary.
13304       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13305         return SDValue(N, 0);
13306     }
13307   }
13308
13309   // Try transforming N to an indexed store.
13310   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13311     return SDValue(N, 0);
13312
13313   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13314   //
13315   // Make sure to do this only after attempting to merge stores in order to
13316   //  avoid changing the types of some subset of stores due to visit order,
13317   //  preventing their merging.
13318   if (isa<ConstantFPSDNode>(ST->getValue())) {
13319     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13320       return NewSt;
13321   }
13322
13323   if (SDValue NewSt = splitMergedValStore(ST))
13324     return NewSt;
13325
13326   return ReduceLoadOpStoreWidth(N);
13327 }
13328
13329 /// For the instruction sequence of store below, F and I values
13330 /// are bundled together as an i64 value before being stored into memory.
13331 /// Sometimes it is more efficent to generate separate stores for F and I,
13332 /// which can remove the bitwise instructions or sink them to colder places.
13333 ///
13334 ///   (store (or (zext (bitcast F to i32) to i64),
13335 ///              (shl (zext I to i64), 32)), addr)  -->
13336 ///   (store F, addr) and (store I, addr+4)
13337 ///
13338 /// Similarly, splitting for other merged store can also be beneficial, like:
13339 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13340 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13341 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13342 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13343 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13344 ///
13345 /// We allow each target to determine specifically which kind of splitting is
13346 /// supported.
13347 ///
13348 /// The store patterns are commonly seen from the simple code snippet below
13349 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13350 ///   void goo(const std::pair<int, float> &);
13351 ///   hoo() {
13352 ///     ...
13353 ///     goo(std::make_pair(tmp, ftmp));
13354 ///     ...
13355 ///   }
13356 ///
13357 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13358   if (OptLevel == CodeGenOpt::None)
13359     return SDValue();
13360
13361   SDValue Val = ST->getValue();
13362   SDLoc DL(ST);
13363
13364   // Match OR operand.
13365   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13366     return SDValue();
13367
13368   // Match SHL operand and get Lower and Higher parts of Val.
13369   SDValue Op1 = Val.getOperand(0);
13370   SDValue Op2 = Val.getOperand(1);
13371   SDValue Lo, Hi;
13372   if (Op1.getOpcode() != ISD::SHL) {
13373     std::swap(Op1, Op2);
13374     if (Op1.getOpcode() != ISD::SHL)
13375       return SDValue();
13376   }
13377   Lo = Op2;
13378   Hi = Op1.getOperand(0);
13379   if (!Op1.hasOneUse())
13380     return SDValue();
13381
13382   // Match shift amount to HalfValBitSize.
13383   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13384   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13385   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13386     return SDValue();
13387
13388   // Lo and Hi are zero-extended from int with size less equal than 32
13389   // to i64.
13390   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13391       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13392       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13393       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13394       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13395       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13396     return SDValue();
13397
13398   // Use the EVT of low and high parts before bitcast as the input
13399   // of target query.
13400   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13401                   ? Lo.getOperand(0).getValueType()
13402                   : Lo.getValueType();
13403   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13404                    ? Hi.getOperand(0).getValueType()
13405                    : Hi.getValueType();
13406   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13407     return SDValue();
13408
13409   // Start to split store.
13410   unsigned Alignment = ST->getAlignment();
13411   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13412   AAMDNodes AAInfo = ST->getAAInfo();
13413
13414   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13415   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13416   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13417   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13418
13419   SDValue Chain = ST->getChain();
13420   SDValue Ptr = ST->getBasePtr();
13421   // Lower value store.
13422   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13423                              ST->getAlignment(), MMOFlags, AAInfo);
13424   Ptr =
13425       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13426                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13427   // Higher value store.
13428   SDValue St1 =
13429       DAG.getStore(St0, DL, Hi, Ptr,
13430                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13431                    Alignment / 2, MMOFlags, AAInfo);
13432   return St1;
13433 }
13434
13435 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13436   SDValue InVec = N->getOperand(0);
13437   SDValue InVal = N->getOperand(1);
13438   SDValue EltNo = N->getOperand(2);
13439   SDLoc DL(N);
13440
13441   // If the inserted element is an UNDEF, just use the input vector.
13442   if (InVal.isUndef())
13443     return InVec;
13444
13445   EVT VT = InVec.getValueType();
13446
13447   // Check that we know which element is being inserted
13448   if (!isa<ConstantSDNode>(EltNo))
13449     return SDValue();
13450   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13451
13452   // Canonicalize insert_vector_elt dag nodes.
13453   // Example:
13454   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13455   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13456   //
13457   // Do this only if the child insert_vector node has one use; also
13458   // do this only if indices are both constants and Idx1 < Idx0.
13459   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13460       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13461     unsigned OtherElt = InVec.getConstantOperandVal(2);
13462     if (Elt < OtherElt) {
13463       // Swap nodes.
13464       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13465                                   InVec.getOperand(0), InVal, EltNo);
13466       AddToWorklist(NewOp.getNode());
13467       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13468                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13469     }
13470   }
13471
13472   // If we can't generate a legal BUILD_VECTOR, exit
13473   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13474     return SDValue();
13475
13476   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13477   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13478   // vector elements.
13479   SmallVector<SDValue, 8> Ops;
13480   // Do not combine these two vectors if the output vector will not replace
13481   // the input vector.
13482   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13483     Ops.append(InVec.getNode()->op_begin(),
13484                InVec.getNode()->op_end());
13485   } else if (InVec.isUndef()) {
13486     unsigned NElts = VT.getVectorNumElements();
13487     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13488   } else {
13489     return SDValue();
13490   }
13491
13492   // Insert the element
13493   if (Elt < Ops.size()) {
13494     // All the operands of BUILD_VECTOR must have the same type;
13495     // we enforce that here.
13496     EVT OpVT = Ops[0].getValueType();
13497     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13498   }
13499
13500   // Return the new vector
13501   return DAG.getBuildVector(VT, DL, Ops);
13502 }
13503
13504 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13505     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13506   assert(!OriginalLoad->isVolatile());
13507
13508   EVT ResultVT = EVE->getValueType(0);
13509   EVT VecEltVT = InVecVT.getVectorElementType();
13510   unsigned Align = OriginalLoad->getAlignment();
13511   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13512       VecEltVT.getTypeForEVT(*DAG.getContext()));
13513
13514   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13515     return SDValue();
13516
13517   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13518     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13519   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13520     return SDValue();
13521
13522   Align = NewAlign;
13523
13524   SDValue NewPtr = OriginalLoad->getBasePtr();
13525   SDValue Offset;
13526   EVT PtrType = NewPtr.getValueType();
13527   MachinePointerInfo MPI;
13528   SDLoc DL(EVE);
13529   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13530     int Elt = ConstEltNo->getZExtValue();
13531     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13532     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13533     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13534   } else {
13535     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13536     Offset = DAG.getNode(
13537         ISD::MUL, DL, PtrType, Offset,
13538         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13539     MPI = OriginalLoad->getPointerInfo();
13540   }
13541   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13542
13543   // The replacement we need to do here is a little tricky: we need to
13544   // replace an extractelement of a load with a load.
13545   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13546   // Note that this replacement assumes that the extractvalue is the only
13547   // use of the load; that's okay because we don't want to perform this
13548   // transformation in other cases anyway.
13549   SDValue Load;
13550   SDValue Chain;
13551   if (ResultVT.bitsGT(VecEltVT)) {
13552     // If the result type of vextract is wider than the load, then issue an
13553     // extending load instead.
13554     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13555                                                   VecEltVT)
13556                                    ? ISD::ZEXTLOAD
13557                                    : ISD::EXTLOAD;
13558     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13559                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13560                           Align, OriginalLoad->getMemOperand()->getFlags(),
13561                           OriginalLoad->getAAInfo());
13562     Chain = Load.getValue(1);
13563   } else {
13564     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13565                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13566                        OriginalLoad->getAAInfo());
13567     Chain = Load.getValue(1);
13568     if (ResultVT.bitsLT(VecEltVT))
13569       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13570     else
13571       Load = DAG.getBitcast(ResultVT, Load);
13572   }
13573   WorklistRemover DeadNodes(*this);
13574   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13575   SDValue To[] = { Load, Chain };
13576   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13577   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13578   // worklist explicitly as well.
13579   AddToWorklist(Load.getNode());
13580   AddUsersToWorklist(Load.getNode()); // Add users too
13581   // Make sure to revisit this node to clean it up; it will usually be dead.
13582   AddToWorklist(EVE);
13583   ++OpsNarrowed;
13584   return SDValue(EVE, 0);
13585 }
13586
13587 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13588   // (vextract (scalar_to_vector val, 0) -> val
13589   SDValue InVec = N->getOperand(0);
13590   EVT VT = InVec.getValueType();
13591   EVT NVT = N->getValueType(0);
13592
13593   if (InVec.isUndef())
13594     return DAG.getUNDEF(NVT);
13595
13596   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13597     // Check if the result type doesn't match the inserted element type. A
13598     // SCALAR_TO_VECTOR may truncate the inserted element and the
13599     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13600     SDValue InOp = InVec.getOperand(0);
13601     if (InOp.getValueType() != NVT) {
13602       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13603       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13604     }
13605     return InOp;
13606   }
13607
13608   SDValue EltNo = N->getOperand(1);
13609   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13610
13611   // extract_vector_elt (build_vector x, y), 1 -> y
13612   if (ConstEltNo &&
13613       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13614       TLI.isTypeLegal(VT) &&
13615       (InVec.hasOneUse() ||
13616        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13617     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13618     EVT InEltVT = Elt.getValueType();
13619
13620     // Sometimes build_vector's scalar input types do not match result type.
13621     if (NVT == InEltVT)
13622       return Elt;
13623
13624     // TODO: It may be useful to truncate if free if the build_vector implicitly
13625     // converts.
13626   }
13627
13628   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
13629   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13630       ConstEltNo->isNullValue() && VT.isInteger()) {
13631     SDValue BCSrc = InVec.getOperand(0);
13632     if (BCSrc.getValueType().isScalarInteger())
13633       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13634   }
13635
13636   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13637   //
13638   // This only really matters if the index is non-constant since other combines
13639   // on the constant elements already work.
13640   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13641       EltNo == InVec.getOperand(2)) {
13642     SDValue Elt = InVec.getOperand(1);
13643     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13644   }
13645
13646   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13647   // We only perform this optimization before the op legalization phase because
13648   // we may introduce new vector instructions which are not backed by TD
13649   // patterns. For example on AVX, extracting elements from a wide vector
13650   // without using extract_subvector. However, if we can find an underlying
13651   // scalar value, then we can always use that.
13652   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13653     int NumElem = VT.getVectorNumElements();
13654     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13655     // Find the new index to extract from.
13656     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13657
13658     // Extracting an undef index is undef.
13659     if (OrigElt == -1)
13660       return DAG.getUNDEF(NVT);
13661
13662     // Select the right vector half to extract from.
13663     SDValue SVInVec;
13664     if (OrigElt < NumElem) {
13665       SVInVec = InVec->getOperand(0);
13666     } else {
13667       SVInVec = InVec->getOperand(1);
13668       OrigElt -= NumElem;
13669     }
13670
13671     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13672       SDValue InOp = SVInVec.getOperand(OrigElt);
13673       if (InOp.getValueType() != NVT) {
13674         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13675         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13676       }
13677
13678       return InOp;
13679     }
13680
13681     // FIXME: We should handle recursing on other vector shuffles and
13682     // scalar_to_vector here as well.
13683
13684     if (!LegalOperations) {
13685       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13686       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13687                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13688     }
13689   }
13690
13691   bool BCNumEltsChanged = false;
13692   EVT ExtVT = VT.getVectorElementType();
13693   EVT LVT = ExtVT;
13694
13695   // If the result of load has to be truncated, then it's not necessarily
13696   // profitable.
13697   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13698     return SDValue();
13699
13700   if (InVec.getOpcode() == ISD::BITCAST) {
13701     // Don't duplicate a load with other uses.
13702     if (!InVec.hasOneUse())
13703       return SDValue();
13704
13705     EVT BCVT = InVec.getOperand(0).getValueType();
13706     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13707       return SDValue();
13708     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13709       BCNumEltsChanged = true;
13710     InVec = InVec.getOperand(0);
13711     ExtVT = BCVT.getVectorElementType();
13712   }
13713
13714   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13715   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13716       ISD::isNormalLoad(InVec.getNode()) &&
13717       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13718     SDValue Index = N->getOperand(1);
13719     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13720       if (!OrigLoad->isVolatile()) {
13721         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13722                                                              OrigLoad);
13723       }
13724     }
13725   }
13726
13727   // Perform only after legalization to ensure build_vector / vector_shuffle
13728   // optimizations have already been done.
13729   if (!LegalOperations) return SDValue();
13730
13731   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
13732   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
13733   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
13734
13735   if (ConstEltNo) {
13736     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13737
13738     LoadSDNode *LN0 = nullptr;
13739     const ShuffleVectorSDNode *SVN = nullptr;
13740     if (ISD::isNormalLoad(InVec.getNode())) {
13741       LN0 = cast<LoadSDNode>(InVec);
13742     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13743                InVec.getOperand(0).getValueType() == ExtVT &&
13744                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
13745       // Don't duplicate a load with other uses.
13746       if (!InVec.hasOneUse())
13747         return SDValue();
13748
13749       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
13750     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
13751       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
13752       // =>
13753       // (load $addr+1*size)
13754
13755       // Don't duplicate a load with other uses.
13756       if (!InVec.hasOneUse())
13757         return SDValue();
13758
13759       // If the bit convert changed the number of elements, it is unsafe
13760       // to examine the mask.
13761       if (BCNumEltsChanged)
13762         return SDValue();
13763
13764       // Select the input vector, guarding against out of range extract vector.
13765       unsigned NumElems = VT.getVectorNumElements();
13766       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
13767       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
13768
13769       if (InVec.getOpcode() == ISD::BITCAST) {
13770         // Don't duplicate a load with other uses.
13771         if (!InVec.hasOneUse())
13772           return SDValue();
13773
13774         InVec = InVec.getOperand(0);
13775       }
13776       if (ISD::isNormalLoad(InVec.getNode())) {
13777         LN0 = cast<LoadSDNode>(InVec);
13778         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
13779         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
13780       }
13781     }
13782
13783     // Make sure we found a non-volatile load and the extractelement is
13784     // the only use.
13785     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
13786       return SDValue();
13787
13788     // If Idx was -1 above, Elt is going to be -1, so just return undef.
13789     if (Elt == -1)
13790       return DAG.getUNDEF(LVT);
13791
13792     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
13793   }
13794
13795   return SDValue();
13796 }
13797
13798 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
13799 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
13800   // We perform this optimization post type-legalization because
13801   // the type-legalizer often scalarizes integer-promoted vectors.
13802   // Performing this optimization before may create bit-casts which
13803   // will be type-legalized to complex code sequences.
13804   // We perform this optimization only before the operation legalizer because we
13805   // may introduce illegal operations.
13806   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
13807     return SDValue();
13808
13809   unsigned NumInScalars = N->getNumOperands();
13810   SDLoc DL(N);
13811   EVT VT = N->getValueType(0);
13812
13813   // Check to see if this is a BUILD_VECTOR of a bunch of values
13814   // which come from any_extend or zero_extend nodes. If so, we can create
13815   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
13816   // optimizations. We do not handle sign-extend because we can't fill the sign
13817   // using shuffles.
13818   EVT SourceType = MVT::Other;
13819   bool AllAnyExt = true;
13820
13821   for (unsigned i = 0; i != NumInScalars; ++i) {
13822     SDValue In = N->getOperand(i);
13823     // Ignore undef inputs.
13824     if (In.isUndef()) continue;
13825
13826     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
13827     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
13828
13829     // Abort if the element is not an extension.
13830     if (!ZeroExt && !AnyExt) {
13831       SourceType = MVT::Other;
13832       break;
13833     }
13834
13835     // The input is a ZeroExt or AnyExt. Check the original type.
13836     EVT InTy = In.getOperand(0).getValueType();
13837
13838     // Check that all of the widened source types are the same.
13839     if (SourceType == MVT::Other)
13840       // First time.
13841       SourceType = InTy;
13842     else if (InTy != SourceType) {
13843       // Multiple income types. Abort.
13844       SourceType = MVT::Other;
13845       break;
13846     }
13847
13848     // Check if all of the extends are ANY_EXTENDs.
13849     AllAnyExt &= AnyExt;
13850   }
13851
13852   // In order to have valid types, all of the inputs must be extended from the
13853   // same source type and all of the inputs must be any or zero extend.
13854   // Scalar sizes must be a power of two.
13855   EVT OutScalarTy = VT.getScalarType();
13856   bool ValidTypes = SourceType != MVT::Other &&
13857                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
13858                  isPowerOf2_32(SourceType.getSizeInBits());
13859
13860   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
13861   // turn into a single shuffle instruction.
13862   if (!ValidTypes)
13863     return SDValue();
13864
13865   bool isLE = DAG.getDataLayout().isLittleEndian();
13866   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
13867   assert(ElemRatio > 1 && "Invalid element size ratio");
13868   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
13869                                DAG.getConstant(0, DL, SourceType);
13870
13871   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
13872   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
13873
13874   // Populate the new build_vector
13875   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13876     SDValue Cast = N->getOperand(i);
13877     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
13878             Cast.getOpcode() == ISD::ZERO_EXTEND ||
13879             Cast.isUndef()) && "Invalid cast opcode");
13880     SDValue In;
13881     if (Cast.isUndef())
13882       In = DAG.getUNDEF(SourceType);
13883     else
13884       In = Cast->getOperand(0);
13885     unsigned Index = isLE ? (i * ElemRatio) :
13886                             (i * ElemRatio + (ElemRatio - 1));
13887
13888     assert(Index < Ops.size() && "Invalid index");
13889     Ops[Index] = In;
13890   }
13891
13892   // The type of the new BUILD_VECTOR node.
13893   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
13894   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
13895          "Invalid vector size");
13896   // Check if the new vector type is legal.
13897   if (!isTypeLegal(VecVT)) return SDValue();
13898
13899   // Make the new BUILD_VECTOR.
13900   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
13901
13902   // The new BUILD_VECTOR node has the potential to be further optimized.
13903   AddToWorklist(BV.getNode());
13904   // Bitcast to the desired type.
13905   return DAG.getBitcast(VT, BV);
13906 }
13907
13908 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
13909   EVT VT = N->getValueType(0);
13910
13911   unsigned NumInScalars = N->getNumOperands();
13912   SDLoc DL(N);
13913
13914   EVT SrcVT = MVT::Other;
13915   unsigned Opcode = ISD::DELETED_NODE;
13916   unsigned NumDefs = 0;
13917
13918   for (unsigned i = 0; i != NumInScalars; ++i) {
13919     SDValue In = N->getOperand(i);
13920     unsigned Opc = In.getOpcode();
13921
13922     if (Opc == ISD::UNDEF)
13923       continue;
13924
13925     // If all scalar values are floats and converted from integers.
13926     if (Opcode == ISD::DELETED_NODE &&
13927         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
13928       Opcode = Opc;
13929     }
13930
13931     if (Opc != Opcode)
13932       return SDValue();
13933
13934     EVT InVT = In.getOperand(0).getValueType();
13935
13936     // If all scalar values are typed differently, bail out. It's chosen to
13937     // simplify BUILD_VECTOR of integer types.
13938     if (SrcVT == MVT::Other)
13939       SrcVT = InVT;
13940     if (SrcVT != InVT)
13941       return SDValue();
13942     NumDefs++;
13943   }
13944
13945   // If the vector has just one element defined, it's not worth to fold it into
13946   // a vectorized one.
13947   if (NumDefs < 2)
13948     return SDValue();
13949
13950   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
13951          && "Should only handle conversion from integer to float.");
13952   assert(SrcVT != MVT::Other && "Cannot determine source type!");
13953
13954   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
13955
13956   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
13957     return SDValue();
13958
13959   // Just because the floating-point vector type is legal does not necessarily
13960   // mean that the corresponding integer vector type is.
13961   if (!isTypeLegal(NVT))
13962     return SDValue();
13963
13964   SmallVector<SDValue, 8> Opnds;
13965   for (unsigned i = 0; i != NumInScalars; ++i) {
13966     SDValue In = N->getOperand(i);
13967
13968     if (In.isUndef())
13969       Opnds.push_back(DAG.getUNDEF(SrcVT));
13970     else
13971       Opnds.push_back(In.getOperand(0));
13972   }
13973   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
13974   AddToWorklist(BV.getNode());
13975
13976   return DAG.getNode(Opcode, DL, VT, BV);
13977 }
13978
13979 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
13980                                            ArrayRef<int> VectorMask,
13981                                            SDValue VecIn1, SDValue VecIn2,
13982                                            unsigned LeftIdx) {
13983   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13984   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
13985
13986   EVT VT = N->getValueType(0);
13987   EVT InVT1 = VecIn1.getValueType();
13988   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
13989
13990   unsigned Vec2Offset = InVT1.getVectorNumElements();
13991   unsigned NumElems = VT.getVectorNumElements();
13992   unsigned ShuffleNumElems = NumElems;
13993
13994   // We can't generate a shuffle node with mismatched input and output types.
13995   // Try to make the types match the type of the output.
13996   if (InVT1 != VT || InVT2 != VT) {
13997     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
13998       // If the output vector length is a multiple of both input lengths,
13999       // we can concatenate them and pad the rest with undefs.
14000       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14001       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14002       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14003       ConcatOps[0] = VecIn1;
14004       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14005       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14006       VecIn2 = SDValue();
14007     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14008       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
14009         return SDValue();
14010
14011       if (!VecIn2.getNode()) {
14012         // If we only have one input vector, and it's twice the size of the
14013         // output, split it in two.
14014         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14015                              DAG.getConstant(NumElems, DL, IdxTy));
14016         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14017         // Since we now have shorter input vectors, adjust the offset of the
14018         // second vector's start.
14019         Vec2Offset = NumElems;
14020       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14021         // VecIn1 is wider than the output, and we have another, possibly
14022         // smaller input. Pad the smaller input with undefs, shuffle at the
14023         // input vector width, and extract the output.
14024         // The shuffle type is different than VT, so check legality again.
14025         if (LegalOperations &&
14026             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14027           return SDValue();
14028
14029         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14030         // lower it back into a BUILD_VECTOR. So if the inserted type is
14031         // illegal, don't even try.
14032         if (InVT1 != InVT2) {
14033           if (!TLI.isTypeLegal(InVT2))
14034             return SDValue();
14035           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14036                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14037         }
14038         ShuffleNumElems = NumElems * 2;
14039       } else {
14040         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14041         // than VecIn1. We can't handle this for now - this case will disappear
14042         // when we start sorting the vectors by type.
14043         return SDValue();
14044       }
14045     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14046                InVT1.getSizeInBits() == VT.getSizeInBits()) {
14047       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14048       ConcatOps[0] = VecIn2;
14049       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14050     } else {
14051       // TODO: Support cases where the length mismatch isn't exactly by a
14052       // factor of 2.
14053       // TODO: Move this check upwards, so that if we have bad type
14054       // mismatches, we don't create any DAG nodes.
14055       return SDValue();
14056     }
14057   }
14058
14059   // Initialize mask to undef.
14060   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14061
14062   // Only need to run up to the number of elements actually used, not the
14063   // total number of elements in the shuffle - if we are shuffling a wider
14064   // vector, the high lanes should be set to undef.
14065   for (unsigned i = 0; i != NumElems; ++i) {
14066     if (VectorMask[i] <= 0)
14067       continue;
14068
14069     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14070     if (VectorMask[i] == (int)LeftIdx) {
14071       Mask[i] = ExtIndex;
14072     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14073       Mask[i] = Vec2Offset + ExtIndex;
14074     }
14075   }
14076
14077   // The type the input vectors may have changed above.
14078   InVT1 = VecIn1.getValueType();
14079
14080   // If we already have a VecIn2, it should have the same type as VecIn1.
14081   // If we don't, get an undef/zero vector of the appropriate type.
14082   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14083   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14084
14085   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14086   if (ShuffleNumElems > NumElems)
14087     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14088
14089   return Shuffle;
14090 }
14091
14092 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14093 // operations. If the types of the vectors we're extracting from allow it,
14094 // turn this into a vector_shuffle node.
14095 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14096   SDLoc DL(N);
14097   EVT VT = N->getValueType(0);
14098
14099   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14100   if (!isTypeLegal(VT))
14101     return SDValue();
14102
14103   // May only combine to shuffle after legalize if shuffle is legal.
14104   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14105     return SDValue();
14106
14107   bool UsesZeroVector = false;
14108   unsigned NumElems = N->getNumOperands();
14109
14110   // Record, for each element of the newly built vector, which input vector
14111   // that element comes from. -1 stands for undef, 0 for the zero vector,
14112   // and positive values for the input vectors.
14113   // VectorMask maps each element to its vector number, and VecIn maps vector
14114   // numbers to their initial SDValues.
14115
14116   SmallVector<int, 8> VectorMask(NumElems, -1);
14117   SmallVector<SDValue, 8> VecIn;
14118   VecIn.push_back(SDValue());
14119
14120   for (unsigned i = 0; i != NumElems; ++i) {
14121     SDValue Op = N->getOperand(i);
14122
14123     if (Op.isUndef())
14124       continue;
14125
14126     // See if we can use a blend with a zero vector.
14127     // TODO: Should we generalize this to a blend with an arbitrary constant
14128     // vector?
14129     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14130       UsesZeroVector = true;
14131       VectorMask[i] = 0;
14132       continue;
14133     }
14134
14135     // Not an undef or zero. If the input is something other than an
14136     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14137     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14138         !isa<ConstantSDNode>(Op.getOperand(1)))
14139       return SDValue();
14140
14141     SDValue ExtractedFromVec = Op.getOperand(0);
14142
14143     // All inputs must have the same element type as the output.
14144     if (VT.getVectorElementType() !=
14145         ExtractedFromVec.getValueType().getVectorElementType())
14146       return SDValue();
14147
14148     // Have we seen this input vector before?
14149     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14150     // a map back from SDValues to numbers isn't worth it.
14151     unsigned Idx = std::distance(
14152         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14153     if (Idx == VecIn.size())
14154       VecIn.push_back(ExtractedFromVec);
14155
14156     VectorMask[i] = Idx;
14157   }
14158
14159   // If we didn't find at least one input vector, bail out.
14160   if (VecIn.size() < 2)
14161     return SDValue();
14162
14163   // TODO: We want to sort the vectors by descending length, so that adjacent
14164   // pairs have similar length, and the longer vector is always first in the
14165   // pair.
14166
14167   // TODO: Should this fire if some of the input vectors has illegal type (like
14168   // it does now), or should we let legalization run its course first?
14169
14170   // Shuffle phase:
14171   // Take pairs of vectors, and shuffle them so that the result has elements
14172   // from these vectors in the correct places.
14173   // For example, given:
14174   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14175   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14176   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14177   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14178   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14179   // We will generate:
14180   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14181   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14182   SmallVector<SDValue, 4> Shuffles;
14183   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14184     unsigned LeftIdx = 2 * In + 1;
14185     SDValue VecLeft = VecIn[LeftIdx];
14186     SDValue VecRight =
14187         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14188
14189     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14190                                                 VecRight, LeftIdx))
14191       Shuffles.push_back(Shuffle);
14192     else
14193       return SDValue();
14194   }
14195
14196   // If we need the zero vector as an "ingredient" in the blend tree, add it
14197   // to the list of shuffles.
14198   if (UsesZeroVector)
14199     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14200                                       : DAG.getConstantFP(0.0, DL, VT));
14201
14202   // If we only have one shuffle, we're done.
14203   if (Shuffles.size() == 1)
14204     return Shuffles[0];
14205
14206   // Update the vector mask to point to the post-shuffle vectors.
14207   for (int &Vec : VectorMask)
14208     if (Vec == 0)
14209       Vec = Shuffles.size() - 1;
14210     else
14211       Vec = (Vec - 1) / 2;
14212
14213   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14214   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14215   // generate:
14216   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14217   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14218   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14219   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14220   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14221   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14222   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14223
14224   // Make sure the initial size of the shuffle list is even.
14225   if (Shuffles.size() % 2)
14226     Shuffles.push_back(DAG.getUNDEF(VT));
14227
14228   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14229     if (CurSize % 2) {
14230       Shuffles[CurSize] = DAG.getUNDEF(VT);
14231       CurSize++;
14232     }
14233     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14234       int Left = 2 * In;
14235       int Right = 2 * In + 1;
14236       SmallVector<int, 8> Mask(NumElems, -1);
14237       for (unsigned i = 0; i != NumElems; ++i) {
14238         if (VectorMask[i] == Left) {
14239           Mask[i] = i;
14240           VectorMask[i] = In;
14241         } else if (VectorMask[i] == Right) {
14242           Mask[i] = i + NumElems;
14243           VectorMask[i] = In;
14244         }
14245       }
14246
14247       Shuffles[In] =
14248           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14249     }
14250   }
14251
14252   return Shuffles[0];
14253 }
14254
14255 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14256 // operations which can be matched to a truncate.
14257 SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
14258   // TODO: Add support for big-endian.
14259   if (DAG.getDataLayout().isBigEndian())
14260     return SDValue();
14261   if (N->getNumOperands() < 2)
14262     return SDValue();
14263   SDLoc DL(N);
14264   EVT VT = N->getValueType(0);
14265   unsigned NumElems = N->getNumOperands();
14266
14267   if (!isTypeLegal(VT))
14268     return SDValue();
14269
14270   // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
14271   // index, bail out.
14272   // TODO: Allow undef elements in some cases?
14273   if (any_of(N->ops(), [VT](SDValue Op) {
14274         return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14275                !isa<ConstantSDNode>(Op.getOperand(1)) ||
14276                Op.getValueType() != VT.getVectorElementType();
14277       }))
14278     return SDValue();
14279
14280   // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
14281   auto GetExtractIdx = [](SDValue Extract) {
14282     return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
14283   };
14284
14285   // The first BUILD_VECTOR operand must be an an extract from index zero
14286   // (assuming no undef and little-endian).
14287   if (GetExtractIdx(N->getOperand(0)) != 0)
14288     return SDValue();
14289
14290   // Compute the stride from the first index.
14291   int Stride = GetExtractIdx(N->getOperand(1));
14292   SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
14293
14294   // Proceed only if the stride and the types can be matched to a truncate.
14295   if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
14296       (ExtractedFromVec.getValueType().getVectorNumElements() !=
14297        Stride * NumElems) ||
14298       (VT.getScalarSizeInBits() * Stride > 64))
14299     return SDValue();
14300
14301   // Check remaining operands are consistent with the computed stride.
14302   for (unsigned i = 1; i != NumElems; ++i) {
14303     SDValue Op = N->getOperand(i);
14304
14305     if ((Op.getOperand(0) != ExtractedFromVec) ||
14306         (GetExtractIdx(Op) != Stride * i))
14307       return SDValue();
14308   }
14309
14310   // All checks were ok, construct the truncate.
14311   LLVMContext &Ctx = *DAG.getContext();
14312   EVT NewVT = VT.getVectorVT(
14313       Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
14314   EVT TruncVT =
14315       VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
14316
14317   SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec);
14318   Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
14319   return DAG.getBitcast(VT, Res);
14320 }
14321
14322 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14323   EVT VT = N->getValueType(0);
14324
14325   // A vector built entirely of undefs is undef.
14326   if (ISD::allOperandsUndef(N))
14327     return DAG.getUNDEF(VT);
14328
14329   // Check if we can express BUILD VECTOR via subvector extract.
14330   if (!LegalTypes && (N->getNumOperands() > 1)) {
14331     SDValue Op0 = N->getOperand(0);
14332     auto checkElem = [&](SDValue Op) -> uint64_t {
14333       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14334           (Op0.getOperand(0) == Op.getOperand(0)))
14335         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14336           return CNode->getZExtValue();
14337       return -1;
14338     };
14339
14340     int Offset = checkElem(Op0);
14341     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14342       if (Offset + i != checkElem(N->getOperand(i))) {
14343         Offset = -1;
14344         break;
14345       }
14346     }
14347
14348     if ((Offset == 0) &&
14349         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14350       return Op0.getOperand(0);
14351     if ((Offset != -1) &&
14352         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14353          0)) // IDX must be multiple of output size.
14354       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14355                          Op0.getOperand(0), Op0.getOperand(1));
14356   }
14357
14358   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14359     return V;
14360
14361   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14362     return V;
14363
14364   if (TLI.isDesirableToCombineBuildVectorToTruncate())
14365     if (SDValue V = reduceBuildVecToTrunc(N))
14366       return V;
14367
14368   if (SDValue V = reduceBuildVecToShuffle(N))
14369     return V;
14370
14371   return SDValue();
14372 }
14373
14374 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14375   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14376   EVT OpVT = N->getOperand(0).getValueType();
14377
14378   // If the operands are legal vectors, leave them alone.
14379   if (TLI.isTypeLegal(OpVT))
14380     return SDValue();
14381
14382   SDLoc DL(N);
14383   EVT VT = N->getValueType(0);
14384   SmallVector<SDValue, 8> Ops;
14385
14386   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14387   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14388
14389   // Keep track of what we encounter.
14390   bool AnyInteger = false;
14391   bool AnyFP = false;
14392   for (const SDValue &Op : N->ops()) {
14393     if (ISD::BITCAST == Op.getOpcode() &&
14394         !Op.getOperand(0).getValueType().isVector())
14395       Ops.push_back(Op.getOperand(0));
14396     else if (ISD::UNDEF == Op.getOpcode())
14397       Ops.push_back(ScalarUndef);
14398     else
14399       return SDValue();
14400
14401     // Note whether we encounter an integer or floating point scalar.
14402     // If it's neither, bail out, it could be something weird like x86mmx.
14403     EVT LastOpVT = Ops.back().getValueType();
14404     if (LastOpVT.isFloatingPoint())
14405       AnyFP = true;
14406     else if (LastOpVT.isInteger())
14407       AnyInteger = true;
14408     else
14409       return SDValue();
14410   }
14411
14412   // If any of the operands is a floating point scalar bitcast to a vector,
14413   // use floating point types throughout, and bitcast everything.
14414   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14415   if (AnyFP) {
14416     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14417     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14418     if (AnyInteger) {
14419       for (SDValue &Op : Ops) {
14420         if (Op.getValueType() == SVT)
14421           continue;
14422         if (Op.isUndef())
14423           Op = ScalarUndef;
14424         else
14425           Op = DAG.getBitcast(SVT, Op);
14426       }
14427     }
14428   }
14429
14430   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14431                                VT.getSizeInBits() / SVT.getSizeInBits());
14432   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14433 }
14434
14435 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14436 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14437 // most two distinct vectors the same size as the result, attempt to turn this
14438 // into a legal shuffle.
14439 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14440   EVT VT = N->getValueType(0);
14441   EVT OpVT = N->getOperand(0).getValueType();
14442   int NumElts = VT.getVectorNumElements();
14443   int NumOpElts = OpVT.getVectorNumElements();
14444
14445   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14446   SmallVector<int, 8> Mask;
14447
14448   for (SDValue Op : N->ops()) {
14449     // Peek through any bitcast.
14450     while (Op.getOpcode() == ISD::BITCAST)
14451       Op = Op.getOperand(0);
14452
14453     // UNDEF nodes convert to UNDEF shuffle mask values.
14454     if (Op.isUndef()) {
14455       Mask.append((unsigned)NumOpElts, -1);
14456       continue;
14457     }
14458
14459     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14460       return SDValue();
14461
14462     // What vector are we extracting the subvector from and at what index?
14463     SDValue ExtVec = Op.getOperand(0);
14464
14465     // We want the EVT of the original extraction to correctly scale the
14466     // extraction index.
14467     EVT ExtVT = ExtVec.getValueType();
14468
14469     // Peek through any bitcast.
14470     while (ExtVec.getOpcode() == ISD::BITCAST)
14471       ExtVec = ExtVec.getOperand(0);
14472
14473     // UNDEF nodes convert to UNDEF shuffle mask values.
14474     if (ExtVec.isUndef()) {
14475       Mask.append((unsigned)NumOpElts, -1);
14476       continue;
14477     }
14478
14479     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14480       return SDValue();
14481     int ExtIdx = Op.getConstantOperandVal(1);
14482
14483     // Ensure that we are extracting a subvector from a vector the same
14484     // size as the result.
14485     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14486       return SDValue();
14487
14488     // Scale the subvector index to account for any bitcast.
14489     int NumExtElts = ExtVT.getVectorNumElements();
14490     if (0 == (NumExtElts % NumElts))
14491       ExtIdx /= (NumExtElts / NumElts);
14492     else if (0 == (NumElts % NumExtElts))
14493       ExtIdx *= (NumElts / NumExtElts);
14494     else
14495       return SDValue();
14496
14497     // At most we can reference 2 inputs in the final shuffle.
14498     if (SV0.isUndef() || SV0 == ExtVec) {
14499       SV0 = ExtVec;
14500       for (int i = 0; i != NumOpElts; ++i)
14501         Mask.push_back(i + ExtIdx);
14502     } else if (SV1.isUndef() || SV1 == ExtVec) {
14503       SV1 = ExtVec;
14504       for (int i = 0; i != NumOpElts; ++i)
14505         Mask.push_back(i + ExtIdx + NumElts);
14506     } else {
14507       return SDValue();
14508     }
14509   }
14510
14511   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14512     return SDValue();
14513
14514   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14515                               DAG.getBitcast(VT, SV1), Mask);
14516 }
14517
14518 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14519   // If we only have one input vector, we don't need to do any concatenation.
14520   if (N->getNumOperands() == 1)
14521     return N->getOperand(0);
14522
14523   // Check if all of the operands are undefs.
14524   EVT VT = N->getValueType(0);
14525   if (ISD::allOperandsUndef(N))
14526     return DAG.getUNDEF(VT);
14527
14528   // Optimize concat_vectors where all but the first of the vectors are undef.
14529   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14530         return Op.isUndef();
14531       })) {
14532     SDValue In = N->getOperand(0);
14533     assert(In.getValueType().isVector() && "Must concat vectors");
14534
14535     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14536     if (In->getOpcode() == ISD::BITCAST &&
14537         !In->getOperand(0)->getValueType(0).isVector()) {
14538       SDValue Scalar = In->getOperand(0);
14539
14540       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14541       // look through the trunc so we can still do the transform:
14542       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14543       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14544           !TLI.isTypeLegal(Scalar.getValueType()) &&
14545           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14546         Scalar = Scalar->getOperand(0);
14547
14548       EVT SclTy = Scalar->getValueType(0);
14549
14550       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14551         return SDValue();
14552
14553       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14554       if (VNTNumElms < 2)
14555         return SDValue();
14556
14557       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14558       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14559         return SDValue();
14560
14561       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14562       return DAG.getBitcast(VT, Res);
14563     }
14564   }
14565
14566   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14567   // We have already tested above for an UNDEF only concatenation.
14568   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14569   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14570   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14571     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14572   };
14573   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14574     SmallVector<SDValue, 8> Opnds;
14575     EVT SVT = VT.getScalarType();
14576
14577     EVT MinVT = SVT;
14578     if (!SVT.isFloatingPoint()) {
14579       // If BUILD_VECTOR are from built from integer, they may have different
14580       // operand types. Get the smallest type and truncate all operands to it.
14581       bool FoundMinVT = false;
14582       for (const SDValue &Op : N->ops())
14583         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14584           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14585           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14586           FoundMinVT = true;
14587         }
14588       assert(FoundMinVT && "Concat vector type mismatch");
14589     }
14590
14591     for (const SDValue &Op : N->ops()) {
14592       EVT OpVT = Op.getValueType();
14593       unsigned NumElts = OpVT.getVectorNumElements();
14594
14595       if (ISD::UNDEF == Op.getOpcode())
14596         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14597
14598       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14599         if (SVT.isFloatingPoint()) {
14600           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14601           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14602         } else {
14603           for (unsigned i = 0; i != NumElts; ++i)
14604             Opnds.push_back(
14605                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14606         }
14607       }
14608     }
14609
14610     assert(VT.getVectorNumElements() == Opnds.size() &&
14611            "Concat vector type mismatch");
14612     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14613   }
14614
14615   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14616   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14617     return V;
14618
14619   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14620   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14621     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14622       return V;
14623
14624   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14625   // nodes often generate nop CONCAT_VECTOR nodes.
14626   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14627   // place the incoming vectors at the exact same location.
14628   SDValue SingleSource = SDValue();
14629   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14630
14631   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14632     SDValue Op = N->getOperand(i);
14633
14634     if (Op.isUndef())
14635       continue;
14636
14637     // Check if this is the identity extract:
14638     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14639       return SDValue();
14640
14641     // Find the single incoming vector for the extract_subvector.
14642     if (SingleSource.getNode()) {
14643       if (Op.getOperand(0) != SingleSource)
14644         return SDValue();
14645     } else {
14646       SingleSource = Op.getOperand(0);
14647
14648       // Check the source type is the same as the type of the result.
14649       // If not, this concat may extend the vector, so we can not
14650       // optimize it away.
14651       if (SingleSource.getValueType() != N->getValueType(0))
14652         return SDValue();
14653     }
14654
14655     unsigned IdentityIndex = i * PartNumElem;
14656     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14657     // The extract index must be constant.
14658     if (!CS)
14659       return SDValue();
14660
14661     // Check that we are reading from the identity index.
14662     if (CS->getZExtValue() != IdentityIndex)
14663       return SDValue();
14664   }
14665
14666   if (SingleSource.getNode())
14667     return SingleSource;
14668
14669   return SDValue();
14670 }
14671
14672 /// If we are extracting a subvector produced by a wide binary operator with at
14673 /// at least one operand that was the result of a vector concatenation, then try
14674 /// to use the narrow vector operands directly to avoid the concatenation and
14675 /// extraction.
14676 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
14677   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
14678   // some of these bailouts with other transforms.
14679
14680   // The extract index must be a constant, so we can map it to a concat operand.
14681   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14682   if (!ExtractIndex)
14683     return SDValue();
14684
14685   // Only handle the case where we are doubling and then halving. A larger ratio
14686   // may require more than two narrow binops to replace the wide binop.
14687   EVT VT = Extract->getValueType(0);
14688   unsigned NumElems = VT.getVectorNumElements();
14689   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
14690          "Extract index is not a multiple of the vector length.");
14691   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
14692     return SDValue();
14693
14694   // We are looking for an optionally bitcasted wide vector binary operator
14695   // feeding an extract subvector.
14696   SDValue BinOp = Extract->getOperand(0);
14697   if (BinOp.getOpcode() == ISD::BITCAST)
14698     BinOp = BinOp.getOperand(0);
14699
14700   // TODO: The motivating case for this transform is an x86 AVX1 target. That
14701   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
14702   // flavors, but no other 256-bit integer support. This could be extended to
14703   // handle any binop, but that may require fixing/adding other folds to avoid
14704   // codegen regressions.
14705   unsigned BOpcode = BinOp.getOpcode();
14706   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
14707     return SDValue();
14708
14709   // The binop must be a vector type, so we can chop it in half.
14710   EVT WideBVT = BinOp.getValueType();
14711   if (!WideBVT.isVector())
14712     return SDValue();
14713
14714   // Bail out if the target does not support a narrower version of the binop.
14715   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
14716                                    WideBVT.getVectorNumElements() / 2);
14717   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14718   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
14719     return SDValue();
14720
14721   // Peek through bitcasts of the binary operator operands if needed.
14722   SDValue LHS = BinOp.getOperand(0);
14723   if (LHS.getOpcode() == ISD::BITCAST)
14724     LHS = LHS.getOperand(0);
14725
14726   SDValue RHS = BinOp.getOperand(1);
14727   if (RHS.getOpcode() == ISD::BITCAST)
14728     RHS = RHS.getOperand(0);
14729
14730   // We need at least one concatenation operation of a binop operand to make
14731   // this transform worthwhile. The concat must double the input vector sizes.
14732   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
14733   bool ConcatL =
14734       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
14735   bool ConcatR =
14736       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
14737   if (!ConcatL && !ConcatR)
14738     return SDValue();
14739
14740   // If one of the binop operands was not the result of a concat, we must
14741   // extract a half-sized operand for our new narrow binop. We can't just reuse
14742   // the original extract index operand because we may have bitcasted.
14743   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
14744   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
14745   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
14746   SDLoc DL(Extract);
14747
14748   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
14749   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
14750   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
14751   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
14752                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14753                                     BinOp.getOperand(0),
14754                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14755
14756   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
14757                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14758                                     BinOp.getOperand(1),
14759                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14760
14761   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
14762   return DAG.getBitcast(VT, NarrowBinOp);
14763 }
14764
14765 /// If we are extracting a subvector from a wide vector load, convert to a
14766 /// narrow load to eliminate the extraction:
14767 /// (extract_subvector (load wide vector)) --> (load narrow vector)
14768 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
14769   // TODO: Add support for big-endian. The offset calculation must be adjusted.
14770   if (DAG.getDataLayout().isBigEndian())
14771     return SDValue();
14772
14773   // TODO: The one-use check is overly conservative. Check the cost of the
14774   // extract instead or remove that condition entirely.
14775   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
14776   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14777   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
14778       !ExtIdx)
14779     return SDValue();
14780
14781   // The narrow load will be offset from the base address of the old load if
14782   // we are extracting from something besides index 0 (little-endian).
14783   EVT VT = Extract->getValueType(0);
14784   SDLoc DL(Extract);
14785   SDValue BaseAddr = Ld->getOperand(1);
14786   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
14787
14788   // TODO: Use "BaseIndexOffset" to make this more effective.
14789   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
14790   MachineFunction &MF = DAG.getMachineFunction();
14791   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
14792                                                    VT.getStoreSize());
14793   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
14794   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
14795   return NewLd;
14796 }
14797
14798 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
14799   EVT NVT = N->getValueType(0);
14800   SDValue V = N->getOperand(0);
14801
14802   // Extract from UNDEF is UNDEF.
14803   if (V.isUndef())
14804     return DAG.getUNDEF(NVT);
14805
14806   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
14807     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
14808       return NarrowLoad;
14809
14810   // Combine:
14811   //    (extract_subvec (concat V1, V2, ...), i)
14812   // Into:
14813   //    Vi if possible
14814   // Only operand 0 is checked as 'concat' assumes all inputs of the same
14815   // type.
14816   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
14817       isa<ConstantSDNode>(N->getOperand(1)) &&
14818       V->getOperand(0).getValueType() == NVT) {
14819     unsigned Idx = N->getConstantOperandVal(1);
14820     unsigned NumElems = NVT.getVectorNumElements();
14821     assert((Idx % NumElems) == 0 &&
14822            "IDX in concat is not a multiple of the result vector length.");
14823     return V->getOperand(Idx / NumElems);
14824   }
14825
14826   // Skip bitcasting
14827   if (V->getOpcode() == ISD::BITCAST)
14828     V = V.getOperand(0);
14829
14830   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
14831     // Handle only simple case where vector being inserted and vector
14832     // being extracted are of same size.
14833     EVT SmallVT = V->getOperand(1).getValueType();
14834     if (!NVT.bitsEq(SmallVT))
14835       return SDValue();
14836
14837     // Only handle cases where both indexes are constants.
14838     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14839     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
14840
14841     if (InsIdx && ExtIdx) {
14842       // Combine:
14843       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
14844       // Into:
14845       //    indices are equal or bit offsets are equal => V1
14846       //    otherwise => (extract_subvec V1, ExtIdx)
14847       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
14848           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
14849         return DAG.getBitcast(NVT, V->getOperand(1));
14850       return DAG.getNode(
14851           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
14852           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
14853           N->getOperand(1));
14854     }
14855   }
14856
14857   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
14858     return NarrowBOp;
14859
14860   return SDValue();
14861 }
14862
14863 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
14864                                                  SDValue V, SelectionDAG &DAG) {
14865   SDLoc DL(V);
14866   EVT VT = V.getValueType();
14867
14868   switch (V.getOpcode()) {
14869   default:
14870     return V;
14871
14872   case ISD::CONCAT_VECTORS: {
14873     EVT OpVT = V->getOperand(0).getValueType();
14874     int OpSize = OpVT.getVectorNumElements();
14875     SmallBitVector OpUsedElements(OpSize, false);
14876     bool FoundSimplification = false;
14877     SmallVector<SDValue, 4> NewOps;
14878     NewOps.reserve(V->getNumOperands());
14879     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
14880       SDValue Op = V->getOperand(i);
14881       bool OpUsed = false;
14882       for (int j = 0; j < OpSize; ++j)
14883         if (UsedElements[i * OpSize + j]) {
14884           OpUsedElements[j] = true;
14885           OpUsed = true;
14886         }
14887       NewOps.push_back(
14888           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
14889                  : DAG.getUNDEF(OpVT));
14890       FoundSimplification |= Op == NewOps.back();
14891       OpUsedElements.reset();
14892     }
14893     if (FoundSimplification)
14894       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
14895     return V;
14896   }
14897
14898   case ISD::INSERT_SUBVECTOR: {
14899     SDValue BaseV = V->getOperand(0);
14900     SDValue SubV = V->getOperand(1);
14901     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
14902     if (!IdxN)
14903       return V;
14904
14905     int SubSize = SubV.getValueType().getVectorNumElements();
14906     int Idx = IdxN->getZExtValue();
14907     bool SubVectorUsed = false;
14908     SmallBitVector SubUsedElements(SubSize, false);
14909     for (int i = 0; i < SubSize; ++i)
14910       if (UsedElements[i + Idx]) {
14911         SubVectorUsed = true;
14912         SubUsedElements[i] = true;
14913         UsedElements[i + Idx] = false;
14914       }
14915
14916     // Now recurse on both the base and sub vectors.
14917     SDValue SimplifiedSubV =
14918         SubVectorUsed
14919             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
14920             : DAG.getUNDEF(SubV.getValueType());
14921     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
14922     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
14923       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
14924                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
14925     return V;
14926   }
14927   }
14928 }
14929
14930 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
14931                                        SDValue N1, SelectionDAG &DAG) {
14932   EVT VT = SVN->getValueType(0);
14933   int NumElts = VT.getVectorNumElements();
14934   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
14935   for (int M : SVN->getMask())
14936     if (M >= 0 && M < NumElts)
14937       N0UsedElements[M] = true;
14938     else if (M >= NumElts)
14939       N1UsedElements[M - NumElts] = true;
14940
14941   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
14942   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
14943   if (S0 == N0 && S1 == N1)
14944     return SDValue();
14945
14946   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
14947 }
14948
14949 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
14950 // or turn a shuffle of a single concat into simpler shuffle then concat.
14951 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
14952   EVT VT = N->getValueType(0);
14953   unsigned NumElts = VT.getVectorNumElements();
14954
14955   SDValue N0 = N->getOperand(0);
14956   SDValue N1 = N->getOperand(1);
14957   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14958
14959   SmallVector<SDValue, 4> Ops;
14960   EVT ConcatVT = N0.getOperand(0).getValueType();
14961   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
14962   unsigned NumConcats = NumElts / NumElemsPerConcat;
14963
14964   // Special case: shuffle(concat(A,B)) can be more efficiently represented
14965   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
14966   // half vector elements.
14967   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
14968       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
14969                   SVN->getMask().end(), [](int i) { return i == -1; })) {
14970     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
14971                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
14972     N1 = DAG.getUNDEF(ConcatVT);
14973     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
14974   }
14975
14976   // Look at every vector that's inserted. We're looking for exact
14977   // subvector-sized copies from a concatenated vector
14978   for (unsigned I = 0; I != NumConcats; ++I) {
14979     // Make sure we're dealing with a copy.
14980     unsigned Begin = I * NumElemsPerConcat;
14981     bool AllUndef = true, NoUndef = true;
14982     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
14983       if (SVN->getMaskElt(J) >= 0)
14984         AllUndef = false;
14985       else
14986         NoUndef = false;
14987     }
14988
14989     if (NoUndef) {
14990       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
14991         return SDValue();
14992
14993       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
14994         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
14995           return SDValue();
14996
14997       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
14998       if (FirstElt < N0.getNumOperands())
14999         Ops.push_back(N0.getOperand(FirstElt));
15000       else
15001         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15002
15003     } else if (AllUndef) {
15004       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15005     } else { // Mixed with general masks and undefs, can't do optimization.
15006       return SDValue();
15007     }
15008   }
15009
15010   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15011 }
15012
15013 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15014 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15015 //
15016 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15017 // a simplification in some sense, but it isn't appropriate in general: some
15018 // BUILD_VECTORs are substantially cheaper than others. The general case
15019 // of a BUILD_VECTOR requires inserting each element individually (or
15020 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15021 // all constants is a single constant pool load.  A BUILD_VECTOR where each
15022 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
15023 // are undef lowers to a small number of element insertions.
15024 //
15025 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15026 // We don't fold shuffles where one side is a non-zero constant, and we don't
15027 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
15028 // non-constant operands. This seems to work out reasonably well in practice.
15029 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15030                                        SelectionDAG &DAG,
15031                                        const TargetLowering &TLI) {
15032   EVT VT = SVN->getValueType(0);
15033   unsigned NumElts = VT.getVectorNumElements();
15034   SDValue N0 = SVN->getOperand(0);
15035   SDValue N1 = SVN->getOperand(1);
15036
15037   if (!N0->hasOneUse() || !N1->hasOneUse())
15038     return SDValue();
15039   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15040   // discussed above.
15041   if (!N1.isUndef()) {
15042     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15043     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15044     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15045       return SDValue();
15046     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15047       return SDValue();
15048   }
15049
15050   SmallVector<SDValue, 8> Ops;
15051   SmallSet<SDValue, 16> DuplicateOps;
15052   for (int M : SVN->getMask()) {
15053     SDValue Op = DAG.getUNDEF(VT.getScalarType());
15054     if (M >= 0) {
15055       int Idx = M < (int)NumElts ? M : M - NumElts;
15056       SDValue &S = (M < (int)NumElts ? N0 : N1);
15057       if (S.getOpcode() == ISD::BUILD_VECTOR) {
15058         Op = S.getOperand(Idx);
15059       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15060         if (Idx == 0)
15061           Op = S.getOperand(0);
15062       } else {
15063         // Operand can't be combined - bail out.
15064         return SDValue();
15065       }
15066     }
15067
15068     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
15069     // fine, but it's likely to generate low-quality code if the target can't
15070     // reconstruct an appropriate shuffle.
15071     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15072       if (!DuplicateOps.insert(Op).second)
15073         return SDValue();
15074
15075     Ops.push_back(Op);
15076   }
15077   // BUILD_VECTOR requires all inputs to be of the same type, find the
15078   // maximum type and extend them all.
15079   EVT SVT = VT.getScalarType();
15080   if (SVT.isInteger())
15081     for (SDValue &Op : Ops)
15082       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15083   if (SVT != VT.getScalarType())
15084     for (SDValue &Op : Ops)
15085       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15086                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15087                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15088   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15089 }
15090
15091 // Match shuffles that can be converted to any_vector_extend_in_reg.
15092 // This is often generated during legalization.
15093 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15094 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15095 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15096                                             SelectionDAG &DAG,
15097                                             const TargetLowering &TLI,
15098                                             bool LegalOperations) {
15099   EVT VT = SVN->getValueType(0);
15100   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15101
15102   // TODO Add support for big-endian when we have a test case.
15103   if (!VT.isInteger() || IsBigEndian)
15104     return SDValue();
15105
15106   unsigned NumElts = VT.getVectorNumElements();
15107   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15108   ArrayRef<int> Mask = SVN->getMask();
15109   SDValue N0 = SVN->getOperand(0);
15110
15111   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15112   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15113     for (unsigned i = 0; i != NumElts; ++i) {
15114       if (Mask[i] < 0)
15115         continue;
15116       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15117         continue;
15118       return false;
15119     }
15120     return true;
15121   };
15122
15123   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15124   // power-of-2 extensions as they are the most likely.
15125   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15126     if (!isAnyExtend(Scale))
15127       continue;
15128
15129     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15130     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15131     if (!LegalOperations ||
15132         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15133       return DAG.getBitcast(VT,
15134                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15135   }
15136
15137   return SDValue();
15138 }
15139
15140 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15141 // each source element of a large type into the lowest elements of a smaller
15142 // destination type. This is often generated during legalization.
15143 // If the source node itself was a '*_extend_vector_inreg' node then we should
15144 // then be able to remove it.
15145 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15146                                         SelectionDAG &DAG) {
15147   EVT VT = SVN->getValueType(0);
15148   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15149
15150   // TODO Add support for big-endian when we have a test case.
15151   if (!VT.isInteger() || IsBigEndian)
15152     return SDValue();
15153
15154   SDValue N0 = SVN->getOperand(0);
15155   while (N0.getOpcode() == ISD::BITCAST)
15156     N0 = N0.getOperand(0);
15157
15158   unsigned Opcode = N0.getOpcode();
15159   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15160       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15161       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15162     return SDValue();
15163
15164   SDValue N00 = N0.getOperand(0);
15165   ArrayRef<int> Mask = SVN->getMask();
15166   unsigned NumElts = VT.getVectorNumElements();
15167   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15168   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15169   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15170
15171   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15172     return SDValue();
15173   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15174
15175   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15176   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15177   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15178   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15179     for (unsigned i = 0; i != NumElts; ++i) {
15180       if (Mask[i] < 0)
15181         continue;
15182       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15183         continue;
15184       return false;
15185     }
15186     return true;
15187   };
15188
15189   // At the moment we just handle the case where we've truncated back to the
15190   // same size as before the extension.
15191   // TODO: handle more extension/truncation cases as cases arise.
15192   if (EltSizeInBits != ExtSrcSizeInBits)
15193     return SDValue();
15194
15195   // We can remove *extend_vector_inreg only if the truncation happens at
15196   // the same scale as the extension.
15197   if (isTruncate(ExtScale))
15198     return DAG.getBitcast(VT, N00);
15199
15200   return SDValue();
15201 }
15202
15203 // Combine shuffles of splat-shuffles of the form:
15204 // shuffle (shuffle V, undef, splat-mask), undef, M
15205 // If splat-mask contains undef elements, we need to be careful about
15206 // introducing undef's in the folded mask which are not the result of composing
15207 // the masks of the shuffles.
15208 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15209                                      ShuffleVectorSDNode *Splat,
15210                                      SelectionDAG &DAG) {
15211   ArrayRef<int> SplatMask = Splat->getMask();
15212   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15213
15214   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15215   // every undef mask element in the splat-shuffle has a corresponding undef
15216   // element in the user-shuffle's mask or if the composition of mask elements
15217   // would result in undef.
15218   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15219   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15220   //   In this case it is not legal to simplify to the splat-shuffle because we
15221   //   may be exposing the users of the shuffle an undef element at index 1
15222   //   which was not there before the combine.
15223   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15224   //   In this case the composition of masks yields SplatMask, so it's ok to
15225   //   simplify to the splat-shuffle.
15226   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15227   //   In this case the composed mask includes all undef elements of SplatMask
15228   //   and in addition sets element zero to undef. It is safe to simplify to
15229   //   the splat-shuffle.
15230   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15231                                        ArrayRef<int> SplatMask) {
15232     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15233       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15234           SplatMask[UserMask[i]] != -1)
15235         return false;
15236     return true;
15237   };
15238   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15239     return SDValue(Splat, 0);
15240
15241   // Create a new shuffle with a mask that is composed of the two shuffles'
15242   // masks.
15243   SmallVector<int, 32> NewMask;
15244   for (int Idx : UserMask)
15245     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15246
15247   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15248                               Splat->getOperand(0), Splat->getOperand(1),
15249                               NewMask);
15250 }
15251
15252 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15253   EVT VT = N->getValueType(0);
15254   unsigned NumElts = VT.getVectorNumElements();
15255
15256   SDValue N0 = N->getOperand(0);
15257   SDValue N1 = N->getOperand(1);
15258
15259   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15260
15261   // Canonicalize shuffle undef, undef -> undef
15262   if (N0.isUndef() && N1.isUndef())
15263     return DAG.getUNDEF(VT);
15264
15265   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15266
15267   // Canonicalize shuffle v, v -> v, undef
15268   if (N0 == N1) {
15269     SmallVector<int, 8> NewMask;
15270     for (unsigned i = 0; i != NumElts; ++i) {
15271       int Idx = SVN->getMaskElt(i);
15272       if (Idx >= (int)NumElts) Idx -= NumElts;
15273       NewMask.push_back(Idx);
15274     }
15275     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15276   }
15277
15278   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15279   if (N0.isUndef())
15280     return DAG.getCommutedVectorShuffle(*SVN);
15281
15282   // Remove references to rhs if it is undef
15283   if (N1.isUndef()) {
15284     bool Changed = false;
15285     SmallVector<int, 8> NewMask;
15286     for (unsigned i = 0; i != NumElts; ++i) {
15287       int Idx = SVN->getMaskElt(i);
15288       if (Idx >= (int)NumElts) {
15289         Idx = -1;
15290         Changed = true;
15291       }
15292       NewMask.push_back(Idx);
15293     }
15294     if (Changed)
15295       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15296   }
15297
15298   // A shuffle of a single vector that is a splat can always be folded.
15299   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15300     if (N1->isUndef() && N0Shuf->isSplat())
15301       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15302
15303   // If it is a splat, check if the argument vector is another splat or a
15304   // build_vector.
15305   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15306     SDNode *V = N0.getNode();
15307
15308     // If this is a bit convert that changes the element type of the vector but
15309     // not the number of vector elements, look through it.  Be careful not to
15310     // look though conversions that change things like v4f32 to v2f64.
15311     if (V->getOpcode() == ISD::BITCAST) {
15312       SDValue ConvInput = V->getOperand(0);
15313       if (ConvInput.getValueType().isVector() &&
15314           ConvInput.getValueType().getVectorNumElements() == NumElts)
15315         V = ConvInput.getNode();
15316     }
15317
15318     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15319       assert(V->getNumOperands() == NumElts &&
15320              "BUILD_VECTOR has wrong number of operands");
15321       SDValue Base;
15322       bool AllSame = true;
15323       for (unsigned i = 0; i != NumElts; ++i) {
15324         if (!V->getOperand(i).isUndef()) {
15325           Base = V->getOperand(i);
15326           break;
15327         }
15328       }
15329       // Splat of <u, u, u, u>, return <u, u, u, u>
15330       if (!Base.getNode())
15331         return N0;
15332       for (unsigned i = 0; i != NumElts; ++i) {
15333         if (V->getOperand(i) != Base) {
15334           AllSame = false;
15335           break;
15336         }
15337       }
15338       // Splat of <x, x, x, x>, return <x, x, x, x>
15339       if (AllSame)
15340         return N0;
15341
15342       // Canonicalize any other splat as a build_vector.
15343       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15344       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15345       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15346
15347       // We may have jumped through bitcasts, so the type of the
15348       // BUILD_VECTOR may not match the type of the shuffle.
15349       if (V->getValueType(0) != VT)
15350         NewBV = DAG.getBitcast(VT, NewBV);
15351       return NewBV;
15352     }
15353   }
15354
15355   // There are various patterns used to build up a vector from smaller vectors,
15356   // subvectors, or elements. Scan chains of these and replace unused insertions
15357   // or components with undef.
15358   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
15359     return S;
15360
15361   // Match shuffles that can be converted to any_vector_extend_in_reg.
15362   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
15363     return V;
15364
15365   // Combine "truncate_vector_in_reg" style shuffles.
15366   if (SDValue V = combineTruncationShuffle(SVN, DAG))
15367     return V;
15368
15369   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
15370       Level < AfterLegalizeVectorOps &&
15371       (N1.isUndef() ||
15372       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15373        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
15374     if (SDValue V = partitionShuffleOfConcats(N, DAG))
15375       return V;
15376   }
15377
15378   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15379   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15380   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15381     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
15382       return Res;
15383
15384   // If this shuffle only has a single input that is a bitcasted shuffle,
15385   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15386   // back to their original types.
15387   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15388       N1.isUndef() && Level < AfterLegalizeVectorOps &&
15389       TLI.isTypeLegal(VT)) {
15390
15391     // Peek through the bitcast only if there is one user.
15392     SDValue BC0 = N0;
15393     while (BC0.getOpcode() == ISD::BITCAST) {
15394       if (!BC0.hasOneUse())
15395         break;
15396       BC0 = BC0.getOperand(0);
15397     }
15398
15399     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15400       if (Scale == 1)
15401         return SmallVector<int, 8>(Mask.begin(), Mask.end());
15402
15403       SmallVector<int, 8> NewMask;
15404       for (int M : Mask)
15405         for (int s = 0; s != Scale; ++s)
15406           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
15407       return NewMask;
15408     };
15409
15410     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
15411       EVT SVT = VT.getScalarType();
15412       EVT InnerVT = BC0->getValueType(0);
15413       EVT InnerSVT = InnerVT.getScalarType();
15414
15415       // Determine which shuffle works with the smaller scalar type.
15416       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
15417       EVT ScaleSVT = ScaleVT.getScalarType();
15418
15419       if (TLI.isTypeLegal(ScaleVT) &&
15420           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15421           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
15422
15423         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15424         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15425
15426         // Scale the shuffle masks to the smaller scalar type.
15427         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15428         SmallVector<int, 8> InnerMask =
15429             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15430         SmallVector<int, 8> OuterMask =
15431             ScaleShuffleMask(SVN->getMask(), OuterScale);
15432
15433         // Merge the shuffle masks.
15434         SmallVector<int, 8> NewMask;
15435         for (int M : OuterMask)
15436           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
15437
15438         // Test for shuffle mask legality over both commutations.
15439         SDValue SV0 = BC0->getOperand(0);
15440         SDValue SV1 = BC0->getOperand(1);
15441         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15442         if (!LegalMask) {
15443           std::swap(SV0, SV1);
15444           ShuffleVectorSDNode::commuteMask(NewMask);
15445           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15446         }
15447
15448         if (LegalMask) {
15449           SV0 = DAG.getBitcast(ScaleVT, SV0);
15450           SV1 = DAG.getBitcast(ScaleVT, SV1);
15451           return DAG.getBitcast(
15452               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15453         }
15454       }
15455     }
15456   }
15457
15458   // Canonicalize shuffles according to rules:
15459   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15460   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15461   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15462   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15463       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
15464       TLI.isTypeLegal(VT)) {
15465     // The incoming shuffle must be of the same type as the result of the
15466     // current shuffle.
15467     assert(N1->getOperand(0).getValueType() == VT &&
15468            "Shuffle types don't match");
15469
15470     SDValue SV0 = N1->getOperand(0);
15471     SDValue SV1 = N1->getOperand(1);
15472     bool HasSameOp0 = N0 == SV0;
15473     bool IsSV1Undef = SV1.isUndef();
15474     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
15475       // Commute the operands of this shuffle so that next rule
15476       // will trigger.
15477       return DAG.getCommutedVectorShuffle(*SVN);
15478   }
15479
15480   // Try to fold according to rules:
15481   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15482   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15483   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15484   // Don't try to fold shuffles with illegal type.
15485   // Only fold if this shuffle is the only user of the other shuffle.
15486   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
15487       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
15488     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15489
15490     // Don't try to fold splats; they're likely to simplify somehow, or they
15491     // might be free.
15492     if (OtherSV->isSplat())
15493       return SDValue();
15494
15495     // The incoming shuffle must be of the same type as the result of the
15496     // current shuffle.
15497     assert(OtherSV->getOperand(0).getValueType() == VT &&
15498            "Shuffle types don't match");
15499
15500     SDValue SV0, SV1;
15501     SmallVector<int, 4> Mask;
15502     // Compute the combined shuffle mask for a shuffle with SV0 as the first
15503     // operand, and SV1 as the second operand.
15504     for (unsigned i = 0; i != NumElts; ++i) {
15505       int Idx = SVN->getMaskElt(i);
15506       if (Idx < 0) {
15507         // Propagate Undef.
15508         Mask.push_back(Idx);
15509         continue;
15510       }
15511
15512       SDValue CurrentVec;
15513       if (Idx < (int)NumElts) {
15514         // This shuffle index refers to the inner shuffle N0. Lookup the inner
15515         // shuffle mask to identify which vector is actually referenced.
15516         Idx = OtherSV->getMaskElt(Idx);
15517         if (Idx < 0) {
15518           // Propagate Undef.
15519           Mask.push_back(Idx);
15520           continue;
15521         }
15522
15523         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
15524                                            : OtherSV->getOperand(1);
15525       } else {
15526         // This shuffle index references an element within N1.
15527         CurrentVec = N1;
15528       }
15529
15530       // Simple case where 'CurrentVec' is UNDEF.
15531       if (CurrentVec.isUndef()) {
15532         Mask.push_back(-1);
15533         continue;
15534       }
15535
15536       // Canonicalize the shuffle index. We don't know yet if CurrentVec
15537       // will be the first or second operand of the combined shuffle.
15538       Idx = Idx % NumElts;
15539       if (!SV0.getNode() || SV0 == CurrentVec) {
15540         // Ok. CurrentVec is the left hand side.
15541         // Update the mask accordingly.
15542         SV0 = CurrentVec;
15543         Mask.push_back(Idx);
15544         continue;
15545       }
15546
15547       // Bail out if we cannot convert the shuffle pair into a single shuffle.
15548       if (SV1.getNode() && SV1 != CurrentVec)
15549         return SDValue();
15550
15551       // Ok. CurrentVec is the right hand side.
15552       // Update the mask accordingly.
15553       SV1 = CurrentVec;
15554       Mask.push_back(Idx + NumElts);
15555     }
15556
15557     // Check if all indices in Mask are Undef. In case, propagate Undef.
15558     bool isUndefMask = true;
15559     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
15560       isUndefMask &= Mask[i] < 0;
15561
15562     if (isUndefMask)
15563       return DAG.getUNDEF(VT);
15564
15565     if (!SV0.getNode())
15566       SV0 = DAG.getUNDEF(VT);
15567     if (!SV1.getNode())
15568       SV1 = DAG.getUNDEF(VT);
15569
15570     // Avoid introducing shuffles with illegal mask.
15571     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
15572       ShuffleVectorSDNode::commuteMask(Mask);
15573
15574       if (!TLI.isShuffleMaskLegal(Mask, VT))
15575         return SDValue();
15576
15577       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
15578       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
15579       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
15580       std::swap(SV0, SV1);
15581     }
15582
15583     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15584     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15585     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15586     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
15587   }
15588
15589   return SDValue();
15590 }
15591
15592 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
15593   SDValue InVal = N->getOperand(0);
15594   EVT VT = N->getValueType(0);
15595
15596   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
15597   // with a VECTOR_SHUFFLE.
15598   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15599     SDValue InVec = InVal->getOperand(0);
15600     SDValue EltNo = InVal->getOperand(1);
15601
15602     // FIXME: We could support implicit truncation if the shuffle can be
15603     // scaled to a smaller vector scalar type.
15604     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
15605     if (C0 && VT == InVec.getValueType() &&
15606         VT.getScalarType() == InVal.getValueType()) {
15607       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
15608       int Elt = C0->getZExtValue();
15609       NewMask[0] = Elt;
15610
15611       if (TLI.isShuffleMaskLegal(NewMask, VT))
15612         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
15613                                     NewMask);
15614     }
15615   }
15616
15617   return SDValue();
15618 }
15619
15620 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
15621   EVT VT = N->getValueType(0);
15622   SDValue N0 = N->getOperand(0);
15623   SDValue N1 = N->getOperand(1);
15624   SDValue N2 = N->getOperand(2);
15625
15626   // If inserting an UNDEF, just return the original vector.
15627   if (N1.isUndef())
15628     return N0;
15629
15630   // If this is an insert of an extracted vector into an undef vector, we can
15631   // just use the input to the extract.
15632   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15633       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
15634     return N1.getOperand(0);
15635
15636   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
15637   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
15638   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
15639   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
15640       N0.getOperand(1).getValueType() == N1.getValueType() &&
15641       N0.getOperand(2) == N2)
15642     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
15643                        N1, N2);
15644
15645   if (!isa<ConstantSDNode>(N2))
15646     return SDValue();
15647
15648   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
15649
15650   // Canonicalize insert_subvector dag nodes.
15651   // Example:
15652   // (insert_subvector (insert_subvector A, Idx0), Idx1)
15653   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
15654   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
15655       N1.getValueType() == N0.getOperand(1).getValueType() &&
15656       isa<ConstantSDNode>(N0.getOperand(2))) {
15657     unsigned OtherIdx = N0.getConstantOperandVal(2);
15658     if (InsIdx < OtherIdx) {
15659       // Swap nodes.
15660       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
15661                                   N0.getOperand(0), N1, N2);
15662       AddToWorklist(NewOp.getNode());
15663       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
15664                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
15665     }
15666   }
15667
15668   // If the input vector is a concatenation, and the insert replaces
15669   // one of the pieces, we can optimize into a single concat_vectors.
15670   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
15671       N0.getOperand(0).getValueType() == N1.getValueType()) {
15672     unsigned Factor = N1.getValueType().getVectorNumElements();
15673
15674     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
15675     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
15676
15677     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15678   }
15679
15680   return SDValue();
15681 }
15682
15683 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
15684   SDValue N0 = N->getOperand(0);
15685
15686   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
15687   if (N0->getOpcode() == ISD::FP16_TO_FP)
15688     return N0->getOperand(0);
15689
15690   return SDValue();
15691 }
15692
15693 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
15694   SDValue N0 = N->getOperand(0);
15695
15696   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
15697   if (N0->getOpcode() == ISD::AND) {
15698     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
15699     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
15700       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
15701                          N0.getOperand(0));
15702     }
15703   }
15704
15705   return SDValue();
15706 }
15707
15708 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
15709 /// with the destination vector and a zero vector.
15710 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
15711 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
15712 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
15713   EVT VT = N->getValueType(0);
15714   SDValue LHS = N->getOperand(0);
15715   SDValue RHS = N->getOperand(1);
15716   SDLoc DL(N);
15717
15718   // Make sure we're not running after operation legalization where it
15719   // may have custom lowered the vector shuffles.
15720   if (LegalOperations)
15721     return SDValue();
15722
15723   if (N->getOpcode() != ISD::AND)
15724     return SDValue();
15725
15726   if (RHS.getOpcode() == ISD::BITCAST)
15727     RHS = RHS.getOperand(0);
15728
15729   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
15730     return SDValue();
15731
15732   EVT RVT = RHS.getValueType();
15733   unsigned NumElts = RHS.getNumOperands();
15734
15735   // Attempt to create a valid clear mask, splitting the mask into
15736   // sub elements and checking to see if each is
15737   // all zeros or all ones - suitable for shuffle masking.
15738   auto BuildClearMask = [&](int Split) {
15739     int NumSubElts = NumElts * Split;
15740     int NumSubBits = RVT.getScalarSizeInBits() / Split;
15741
15742     SmallVector<int, 8> Indices;
15743     for (int i = 0; i != NumSubElts; ++i) {
15744       int EltIdx = i / Split;
15745       int SubIdx = i % Split;
15746       SDValue Elt = RHS.getOperand(EltIdx);
15747       if (Elt.isUndef()) {
15748         Indices.push_back(-1);
15749         continue;
15750       }
15751
15752       APInt Bits;
15753       if (isa<ConstantSDNode>(Elt))
15754         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
15755       else if (isa<ConstantFPSDNode>(Elt))
15756         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
15757       else
15758         return SDValue();
15759
15760       // Extract the sub element from the constant bit mask.
15761       if (DAG.getDataLayout().isBigEndian()) {
15762         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
15763       } else {
15764         Bits.lshrInPlace(SubIdx * NumSubBits);
15765       }
15766
15767       if (Split > 1)
15768         Bits = Bits.trunc(NumSubBits);
15769
15770       if (Bits.isAllOnesValue())
15771         Indices.push_back(i);
15772       else if (Bits == 0)
15773         Indices.push_back(i + NumSubElts);
15774       else
15775         return SDValue();
15776     }
15777
15778     // Let's see if the target supports this vector_shuffle.
15779     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
15780     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
15781     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
15782       return SDValue();
15783
15784     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
15785     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
15786                                                    DAG.getBitcast(ClearVT, LHS),
15787                                                    Zero, Indices));
15788   };
15789
15790   // Determine maximum split level (byte level masking).
15791   int MaxSplit = 1;
15792   if (RVT.getScalarSizeInBits() % 8 == 0)
15793     MaxSplit = RVT.getScalarSizeInBits() / 8;
15794
15795   for (int Split = 1; Split <= MaxSplit; ++Split)
15796     if (RVT.getScalarSizeInBits() % Split == 0)
15797       if (SDValue S = BuildClearMask(Split))
15798         return S;
15799
15800   return SDValue();
15801 }
15802
15803 /// Visit a binary vector operation, like ADD.
15804 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
15805   assert(N->getValueType(0).isVector() &&
15806          "SimplifyVBinOp only works on vectors!");
15807
15808   SDValue LHS = N->getOperand(0);
15809   SDValue RHS = N->getOperand(1);
15810   SDValue Ops[] = {LHS, RHS};
15811
15812   // See if we can constant fold the vector operation.
15813   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
15814           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
15815     return Fold;
15816
15817   // Try to convert a constant mask AND into a shuffle clear mask.
15818   if (SDValue Shuffle = XformToShuffleWithZero(N))
15819     return Shuffle;
15820
15821   // Type legalization might introduce new shuffles in the DAG.
15822   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
15823   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
15824   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
15825       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
15826       LHS.getOperand(1).isUndef() &&
15827       RHS.getOperand(1).isUndef()) {
15828     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
15829     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
15830
15831     if (SVN0->getMask().equals(SVN1->getMask())) {
15832       EVT VT = N->getValueType(0);
15833       SDValue UndefVector = LHS.getOperand(1);
15834       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
15835                                      LHS.getOperand(0), RHS.getOperand(0),
15836                                      N->getFlags());
15837       AddUsersToWorklist(N);
15838       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
15839                                   SVN0->getMask());
15840     }
15841   }
15842
15843   return SDValue();
15844 }
15845
15846 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
15847                                     SDValue N2) {
15848   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
15849
15850   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
15851                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15852
15853   // If we got a simplified select_cc node back from SimplifySelectCC, then
15854   // break it down into a new SETCC node, and a new SELECT node, and then return
15855   // the SELECT node, since we were called with a SELECT node.
15856   if (SCC.getNode()) {
15857     // Check to see if we got a select_cc back (to turn into setcc/select).
15858     // Otherwise, just return whatever node we got back, like fabs.
15859     if (SCC.getOpcode() == ISD::SELECT_CC) {
15860       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
15861                                   N0.getValueType(),
15862                                   SCC.getOperand(0), SCC.getOperand(1),
15863                                   SCC.getOperand(4));
15864       AddToWorklist(SETCC.getNode());
15865       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
15866                            SCC.getOperand(2), SCC.getOperand(3));
15867     }
15868
15869     return SCC;
15870   }
15871   return SDValue();
15872 }
15873
15874 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
15875 /// being selected between, see if we can simplify the select.  Callers of this
15876 /// should assume that TheSelect is deleted if this returns true.  As such, they
15877 /// should return the appropriate thing (e.g. the node) back to the top-level of
15878 /// the DAG combiner loop to avoid it being looked at.
15879 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
15880                                     SDValue RHS) {
15881
15882   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15883   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
15884   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
15885     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
15886       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
15887       SDValue Sqrt = RHS;
15888       ISD::CondCode CC;
15889       SDValue CmpLHS;
15890       const ConstantFPSDNode *Zero = nullptr;
15891
15892       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
15893         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
15894         CmpLHS = TheSelect->getOperand(0);
15895         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
15896       } else {
15897         // SELECT or VSELECT
15898         SDValue Cmp = TheSelect->getOperand(0);
15899         if (Cmp.getOpcode() == ISD::SETCC) {
15900           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
15901           CmpLHS = Cmp.getOperand(0);
15902           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
15903         }
15904       }
15905       if (Zero && Zero->isZero() &&
15906           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
15907           CC == ISD::SETULT || CC == ISD::SETLT)) {
15908         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15909         CombineTo(TheSelect, Sqrt);
15910         return true;
15911       }
15912     }
15913   }
15914   // Cannot simplify select with vector condition
15915   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
15916
15917   // If this is a select from two identical things, try to pull the operation
15918   // through the select.
15919   if (LHS.getOpcode() != RHS.getOpcode() ||
15920       !LHS.hasOneUse() || !RHS.hasOneUse())
15921     return false;
15922
15923   // If this is a load and the token chain is identical, replace the select
15924   // of two loads with a load through a select of the address to load from.
15925   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
15926   // constants have been dropped into the constant pool.
15927   if (LHS.getOpcode() == ISD::LOAD) {
15928     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
15929     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
15930
15931     // Token chains must be identical.
15932     if (LHS.getOperand(0) != RHS.getOperand(0) ||
15933         // Do not let this transformation reduce the number of volatile loads.
15934         LLD->isVolatile() || RLD->isVolatile() ||
15935         // FIXME: If either is a pre/post inc/dec load,
15936         // we'd need to split out the address adjustment.
15937         LLD->isIndexed() || RLD->isIndexed() ||
15938         // If this is an EXTLOAD, the VT's must match.
15939         LLD->getMemoryVT() != RLD->getMemoryVT() ||
15940         // If this is an EXTLOAD, the kind of extension must match.
15941         (LLD->getExtensionType() != RLD->getExtensionType() &&
15942          // The only exception is if one of the extensions is anyext.
15943          LLD->getExtensionType() != ISD::EXTLOAD &&
15944          RLD->getExtensionType() != ISD::EXTLOAD) ||
15945         // FIXME: this discards src value information.  This is
15946         // over-conservative. It would be beneficial to be able to remember
15947         // both potential memory locations.  Since we are discarding
15948         // src value info, don't do the transformation if the memory
15949         // locations are not in the default address space.
15950         LLD->getPointerInfo().getAddrSpace() != 0 ||
15951         RLD->getPointerInfo().getAddrSpace() != 0 ||
15952         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
15953                                       LLD->getBasePtr().getValueType()))
15954       return false;
15955
15956     // Check that the select condition doesn't reach either load.  If so,
15957     // folding this will induce a cycle into the DAG.  If not, this is safe to
15958     // xform, so create a select of the addresses.
15959     SDValue Addr;
15960     if (TheSelect->getOpcode() == ISD::SELECT) {
15961       SDNode *CondNode = TheSelect->getOperand(0).getNode();
15962       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
15963           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
15964         return false;
15965       // The loads must not depend on one another.
15966       if (LLD->isPredecessorOf(RLD) ||
15967           RLD->isPredecessorOf(LLD))
15968         return false;
15969       Addr = DAG.getSelect(SDLoc(TheSelect),
15970                            LLD->getBasePtr().getValueType(),
15971                            TheSelect->getOperand(0), LLD->getBasePtr(),
15972                            RLD->getBasePtr());
15973     } else {  // Otherwise SELECT_CC
15974       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
15975       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
15976
15977       if ((LLD->hasAnyUseOfValue(1) &&
15978            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
15979           (RLD->hasAnyUseOfValue(1) &&
15980            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
15981         return false;
15982
15983       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
15984                          LLD->getBasePtr().getValueType(),
15985                          TheSelect->getOperand(0),
15986                          TheSelect->getOperand(1),
15987                          LLD->getBasePtr(), RLD->getBasePtr(),
15988                          TheSelect->getOperand(4));
15989     }
15990
15991     SDValue Load;
15992     // It is safe to replace the two loads if they have different alignments,
15993     // but the new load must be the minimum (most restrictive) alignment of the
15994     // inputs.
15995     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
15996     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
15997     if (!RLD->isInvariant())
15998       MMOFlags &= ~MachineMemOperand::MOInvariant;
15999     if (!RLD->isDereferenceable())
16000       MMOFlags &= ~MachineMemOperand::MODereferenceable;
16001     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16002       // FIXME: Discards pointer and AA info.
16003       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16004                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16005                          MMOFlags);
16006     } else {
16007       // FIXME: Discards pointer and AA info.
16008       Load = DAG.getExtLoad(
16009           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16010                                                   : LLD->getExtensionType(),
16011           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16012           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16013     }
16014
16015     // Users of the select now use the result of the load.
16016     CombineTo(TheSelect, Load);
16017
16018     // Users of the old loads now use the new load's chain.  We know the
16019     // old-load value is dead now.
16020     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16021     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16022     return true;
16023   }
16024
16025   return false;
16026 }
16027
16028 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16029 /// bitwise 'and'.
16030 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16031                                             SDValue N1, SDValue N2, SDValue N3,
16032                                             ISD::CondCode CC) {
16033   // If this is a select where the false operand is zero and the compare is a
16034   // check of the sign bit, see if we can perform the "gzip trick":
16035   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16036   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16037   EVT XType = N0.getValueType();
16038   EVT AType = N2.getValueType();
16039   if (!isNullConstant(N3) || !XType.bitsGE(AType))
16040     return SDValue();
16041
16042   // If the comparison is testing for a positive value, we have to invert
16043   // the sign bit mask, so only do that transform if the target has a bitwise
16044   // 'and not' instruction (the invert is free).
16045   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16046     // (X > -1) ? A : 0
16047     // (X >  0) ? X : 0 <-- This is canonical signed max.
16048     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16049       return SDValue();
16050   } else if (CC == ISD::SETLT) {
16051     // (X <  0) ? A : 0
16052     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16053     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16054       return SDValue();
16055   } else {
16056     return SDValue();
16057   }
16058
16059   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16060   // constant.
16061   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16062   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16063   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16064     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16065     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16066     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16067     AddToWorklist(Shift.getNode());
16068
16069     if (XType.bitsGT(AType)) {
16070       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16071       AddToWorklist(Shift.getNode());
16072     }
16073
16074     if (CC == ISD::SETGT)
16075       Shift = DAG.getNOT(DL, Shift, AType);
16076
16077     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16078   }
16079
16080   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16081   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16082   AddToWorklist(Shift.getNode());
16083
16084   if (XType.bitsGT(AType)) {
16085     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16086     AddToWorklist(Shift.getNode());
16087   }
16088
16089   if (CC == ISD::SETGT)
16090     Shift = DAG.getNOT(DL, Shift, AType);
16091
16092   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16093 }
16094
16095 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16096 /// where 'cond' is the comparison specified by CC.
16097 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16098                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16099                                       bool NotExtCompare) {
16100   // (x ? y : y) -> y.
16101   if (N2 == N3) return N2;
16102
16103   EVT VT = N2.getValueType();
16104   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16105   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16106
16107   // Determine if the condition we're dealing with is constant
16108   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16109                               N0, N1, CC, DL, false);
16110   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16111
16112   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16113     // fold select_cc true, x, y -> x
16114     // fold select_cc false, x, y -> y
16115     return !SCCC->isNullValue() ? N2 : N3;
16116   }
16117
16118   // Check to see if we can simplify the select into an fabs node
16119   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16120     // Allow either -0.0 or 0.0
16121     if (CFP->isZero()) {
16122       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16123       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16124           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16125           N2 == N3.getOperand(0))
16126         return DAG.getNode(ISD::FABS, DL, VT, N0);
16127
16128       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16129       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16130           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16131           N2.getOperand(0) == N3)
16132         return DAG.getNode(ISD::FABS, DL, VT, N3);
16133     }
16134   }
16135
16136   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16137   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16138   // in it.  This is a win when the constant is not otherwise available because
16139   // it replaces two constant pool loads with one.  We only do this if the FP
16140   // type is known to be legal, because if it isn't, then we are before legalize
16141   // types an we want the other legalization to happen first (e.g. to avoid
16142   // messing with soft float) and if the ConstantFP is not legal, because if
16143   // it is legal, we may not need to store the FP constant in a constant pool.
16144   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16145     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16146       if (TLI.isTypeLegal(N2.getValueType()) &&
16147           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16148                TargetLowering::Legal &&
16149            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16150            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16151           // If both constants have multiple uses, then we won't need to do an
16152           // extra load, they are likely around in registers for other users.
16153           (TV->hasOneUse() || FV->hasOneUse())) {
16154         Constant *Elts[] = {
16155           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16156           const_cast<ConstantFP*>(TV->getConstantFPValue())
16157         };
16158         Type *FPTy = Elts[0]->getType();
16159         const DataLayout &TD = DAG.getDataLayout();
16160
16161         // Create a ConstantArray of the two constants.
16162         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16163         SDValue CPIdx =
16164             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16165                                 TD.getPrefTypeAlignment(FPTy));
16166         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16167
16168         // Get the offsets to the 0 and 1 element of the array so that we can
16169         // select between them.
16170         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16171         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16172         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16173
16174         SDValue Cond = DAG.getSetCC(DL,
16175                                     getSetCCResultType(N0.getValueType()),
16176                                     N0, N1, CC);
16177         AddToWorklist(Cond.getNode());
16178         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16179                                           Cond, One, Zero);
16180         AddToWorklist(CstOffset.getNode());
16181         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16182                             CstOffset);
16183         AddToWorklist(CPIdx.getNode());
16184         return DAG.getLoad(
16185             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16186             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16187             Alignment);
16188       }
16189     }
16190
16191   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16192     return V;
16193
16194   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16195   // where y is has a single bit set.
16196   // A plaintext description would be, we can turn the SELECT_CC into an AND
16197   // when the condition can be materialized as an all-ones register.  Any
16198   // single bit-test can be materialized as an all-ones register with
16199   // shift-left and shift-right-arith.
16200   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16201       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16202     SDValue AndLHS = N0->getOperand(0);
16203     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16204     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16205       // Shift the tested bit over the sign bit.
16206       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16207       SDValue ShlAmt =
16208         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16209                         getShiftAmountTy(AndLHS.getValueType()));
16210       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16211
16212       // Now arithmetic right shift it all the way over, so the result is either
16213       // all-ones, or zero.
16214       SDValue ShrAmt =
16215         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16216                         getShiftAmountTy(Shl.getValueType()));
16217       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16218
16219       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16220     }
16221   }
16222
16223   // fold select C, 16, 0 -> shl C, 4
16224   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16225       TLI.getBooleanContents(N0.getValueType()) ==
16226           TargetLowering::ZeroOrOneBooleanContent) {
16227
16228     // If the caller doesn't want us to simplify this into a zext of a compare,
16229     // don't do it.
16230     if (NotExtCompare && N2C->isOne())
16231       return SDValue();
16232
16233     // Get a SetCC of the condition
16234     // NOTE: Don't create a SETCC if it's not legal on this target.
16235     if (!LegalOperations ||
16236         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16237       SDValue Temp, SCC;
16238       // cast from setcc result type to select result type
16239       if (LegalTypes) {
16240         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16241                             N0, N1, CC);
16242         if (N2.getValueType().bitsLT(SCC.getValueType()))
16243           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16244                                         N2.getValueType());
16245         else
16246           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16247                              N2.getValueType(), SCC);
16248       } else {
16249         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16250         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16251                            N2.getValueType(), SCC);
16252       }
16253
16254       AddToWorklist(SCC.getNode());
16255       AddToWorklist(Temp.getNode());
16256
16257       if (N2C->isOne())
16258         return Temp;
16259
16260       // shl setcc result by log2 n2c
16261       return DAG.getNode(
16262           ISD::SHL, DL, N2.getValueType(), Temp,
16263           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16264                           getShiftAmountTy(Temp.getValueType())));
16265     }
16266   }
16267
16268   // Check to see if this is an integer abs.
16269   // select_cc setg[te] X,  0,  X, -X ->
16270   // select_cc setgt    X, -1,  X, -X ->
16271   // select_cc setl[te] X,  0, -X,  X ->
16272   // select_cc setlt    X,  1, -X,  X ->
16273   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16274   if (N1C) {
16275     ConstantSDNode *SubC = nullptr;
16276     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
16277          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
16278         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
16279       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16280     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
16281               (N1C->isOne() && CC == ISD::SETLT)) &&
16282              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
16283       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16284
16285     EVT XType = N0.getValueType();
16286     if (SubC && SubC->isNullValue() && XType.isInteger()) {
16287       SDLoc DL(N0);
16288       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16289                                   N0,
16290                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
16291                                          getShiftAmountTy(N0.getValueType())));
16292       SDValue Add = DAG.getNode(ISD::ADD, DL,
16293                                 XType, N0, Shift);
16294       AddToWorklist(Shift.getNode());
16295       AddToWorklist(Add.getNode());
16296       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16297     }
16298   }
16299
16300   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16301   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16302   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16303   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16304   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16305   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16306   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16307   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16308   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
16309     SDValue ValueOnZero = N2;
16310     SDValue Count = N3;
16311     // If the condition is NE instead of E, swap the operands.
16312     if (CC == ISD::SETNE)
16313       std::swap(ValueOnZero, Count);
16314     // Check if the value on zero is a constant equal to the bits in the type.
16315     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
16316       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
16317         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16318         // legal, combine to just cttz.
16319         if ((Count.getOpcode() == ISD::CTTZ ||
16320              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16321             N0 == Count.getOperand(0) &&
16322             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
16323           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16324         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16325         // legal, combine to just ctlz.
16326         if ((Count.getOpcode() == ISD::CTLZ ||
16327              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16328             N0 == Count.getOperand(0) &&
16329             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
16330           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16331       }
16332     }
16333   }
16334
16335   return SDValue();
16336 }
16337
16338 /// This is a stub for TargetLowering::SimplifySetCC.
16339 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16340                                    ISD::CondCode Cond, const SDLoc &DL,
16341                                    bool foldBooleans) {
16342   TargetLowering::DAGCombinerInfo
16343     DagCombineInfo(DAG, Level, false, this);
16344   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16345 }
16346
16347 /// Given an ISD::SDIV node expressing a divide by constant, return
16348 /// a DAG expression to select that will generate the same value by multiplying
16349 /// by a magic number.
16350 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16351 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16352   // when optimising for minimum size, we don't want to expand a div to a mul
16353   // and a shift.
16354   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16355     return SDValue();
16356
16357   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16358   if (!C)
16359     return SDValue();
16360
16361   // Avoid division by zero.
16362   if (C->isNullValue())
16363     return SDValue();
16364
16365   std::vector<SDNode*> Built;
16366   SDValue S =
16367       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16368
16369   for (SDNode *N : Built)
16370     AddToWorklist(N);
16371   return S;
16372 }
16373
16374 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16375 /// DAG expression that will generate the same value by right shifting.
16376 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16377   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16378   if (!C)
16379     return SDValue();
16380
16381   // Avoid division by zero.
16382   if (C->isNullValue())
16383     return SDValue();
16384
16385   std::vector<SDNode *> Built;
16386   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16387
16388   for (SDNode *N : Built)
16389     AddToWorklist(N);
16390   return S;
16391 }
16392
16393 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16394 /// expression that will generate the same value by multiplying by a magic
16395 /// number.
16396 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16397 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16398   // when optimising for minimum size, we don't want to expand a div to a mul
16399   // and a shift.
16400   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16401     return SDValue();
16402
16403   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16404   if (!C)
16405     return SDValue();
16406
16407   // Avoid division by zero.
16408   if (C->isNullValue())
16409     return SDValue();
16410
16411   std::vector<SDNode*> Built;
16412   SDValue S =
16413       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16414
16415   for (SDNode *N : Built)
16416     AddToWorklist(N);
16417   return S;
16418 }
16419
16420 /// Determines the LogBase2 value for a non-null input value using the
16421 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16422 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16423   EVT VT = V.getValueType();
16424   unsigned EltBits = VT.getScalarSizeInBits();
16425   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16426   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16427   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16428   return LogBase2;
16429 }
16430
16431 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16432 /// For the reciprocal, we need to find the zero of the function:
16433 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
16434 ///     =>
16435 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16436 ///     does not require additional intermediate precision]
16437 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16438   if (Level >= AfterLegalizeDAG)
16439     return SDValue();
16440
16441   // TODO: Handle half and/or extended types?
16442   EVT VT = Op.getValueType();
16443   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16444     return SDValue();
16445
16446   // If estimates are explicitly disabled for this function, we're done.
16447   MachineFunction &MF = DAG.getMachineFunction();
16448   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16449   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16450     return SDValue();
16451
16452   // Estimates may be explicitly enabled for this type with a custom number of
16453   // refinement steps.
16454   int Iterations = TLI.getDivRefinementSteps(VT, MF);
16455   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16456     AddToWorklist(Est.getNode());
16457
16458     if (Iterations) {
16459       EVT VT = Op.getValueType();
16460       SDLoc DL(Op);
16461       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16462
16463       // Newton iterations: Est = Est + Est (1 - Arg * Est)
16464       for (int i = 0; i < Iterations; ++i) {
16465         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16466         AddToWorklist(NewEst.getNode());
16467
16468         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16469         AddToWorklist(NewEst.getNode());
16470
16471         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16472         AddToWorklist(NewEst.getNode());
16473
16474         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16475         AddToWorklist(Est.getNode());
16476       }
16477     }
16478     return Est;
16479   }
16480
16481   return SDValue();
16482 }
16483
16484 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16485 /// For the reciprocal sqrt, we need to find the zero of the function:
16486 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16487 ///     =>
16488 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16489 /// As a result, we precompute A/2 prior to the iteration loop.
16490 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16491                                          unsigned Iterations,
16492                                          SDNodeFlags Flags, bool Reciprocal) {
16493   EVT VT = Arg.getValueType();
16494   SDLoc DL(Arg);
16495   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16496
16497   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16498   // this entire sequence requires only one FP constant.
16499   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16500   AddToWorklist(HalfArg.getNode());
16501
16502   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16503   AddToWorklist(HalfArg.getNode());
16504
16505   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16506   for (unsigned i = 0; i < Iterations; ++i) {
16507     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16508     AddToWorklist(NewEst.getNode());
16509
16510     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16511     AddToWorklist(NewEst.getNode());
16512
16513     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
16514     AddToWorklist(NewEst.getNode());
16515
16516     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16517     AddToWorklist(Est.getNode());
16518   }
16519
16520   // If non-reciprocal square root is requested, multiply the result by Arg.
16521   if (!Reciprocal) {
16522     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
16523     AddToWorklist(Est.getNode());
16524   }
16525
16526   return Est;
16527 }
16528
16529 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16530 /// For the reciprocal sqrt, we need to find the zero of the function:
16531 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16532 ///     =>
16533 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
16534 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
16535                                          unsigned Iterations,
16536                                          SDNodeFlags Flags, bool Reciprocal) {
16537   EVT VT = Arg.getValueType();
16538   SDLoc DL(Arg);
16539   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
16540   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
16541
16542   // This routine must enter the loop below to work correctly
16543   // when (Reciprocal == false).
16544   assert(Iterations > 0);
16545
16546   // Newton iterations for reciprocal square root:
16547   // E = (E * -0.5) * ((A * E) * E + -3.0)
16548   for (unsigned i = 0; i < Iterations; ++i) {
16549     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
16550     AddToWorklist(AE.getNode());
16551
16552     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
16553     AddToWorklist(AEE.getNode());
16554
16555     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
16556     AddToWorklist(RHS.getNode());
16557
16558     // When calculating a square root at the last iteration build:
16559     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
16560     // (notice a common subexpression)
16561     SDValue LHS;
16562     if (Reciprocal || (i + 1) < Iterations) {
16563       // RSQRT: LHS = (E * -0.5)
16564       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
16565     } else {
16566       // SQRT: LHS = (A * E) * -0.5
16567       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
16568     }
16569     AddToWorklist(LHS.getNode());
16570
16571     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
16572     AddToWorklist(Est.getNode());
16573   }
16574
16575   return Est;
16576 }
16577
16578 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
16579 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
16580 /// Op can be zero.
16581 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
16582                                            bool Reciprocal) {
16583   if (Level >= AfterLegalizeDAG)
16584     return SDValue();
16585
16586   // TODO: Handle half and/or extended types?
16587   EVT VT = Op.getValueType();
16588   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16589     return SDValue();
16590
16591   // If estimates are explicitly disabled for this function, we're done.
16592   MachineFunction &MF = DAG.getMachineFunction();
16593   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
16594   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16595     return SDValue();
16596
16597   // Estimates may be explicitly enabled for this type with a custom number of
16598   // refinement steps.
16599   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
16600
16601   bool UseOneConstNR = false;
16602   if (SDValue Est =
16603       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
16604                           Reciprocal)) {
16605     AddToWorklist(Est.getNode());
16606
16607     if (Iterations) {
16608       Est = UseOneConstNR
16609             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
16610             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
16611
16612       if (!Reciprocal) {
16613         // Unfortunately, Est is now NaN if the input was exactly 0.0.
16614         // Select out this case and force the answer to 0.0.
16615         EVT VT = Op.getValueType();
16616         SDLoc DL(Op);
16617
16618         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
16619         EVT CCVT = getSetCCResultType(VT);
16620         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
16621         AddToWorklist(ZeroCmp.getNode());
16622
16623         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
16624                           ZeroCmp, FPZero, Est);
16625         AddToWorklist(Est.getNode());
16626       }
16627     }
16628     return Est;
16629   }
16630
16631   return SDValue();
16632 }
16633
16634 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16635   return buildSqrtEstimateImpl(Op, Flags, true);
16636 }
16637
16638 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16639   return buildSqrtEstimateImpl(Op, Flags, false);
16640 }
16641
16642 /// Return true if base is a frame index, which is known not to alias with
16643 /// anything but itself.  Provides base object and offset as results.
16644 static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
16645                            const GlobalValue *&GV, const void *&CV) {
16646   // Assume it is a primitive operation.
16647   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
16648
16649   // If it's an adding a simple constant then integrate the offset.
16650   if (Base.getOpcode() == ISD::ADD) {
16651     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
16652       Base = Base.getOperand(0);
16653       Offset += C->getSExtValue();
16654     }
16655   }
16656
16657   // Return the underlying GlobalValue, and update the Offset.  Return false
16658   // for GlobalAddressSDNode since the same GlobalAddress may be represented
16659   // by multiple nodes with different offsets.
16660   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
16661     GV = G->getGlobal();
16662     Offset += G->getOffset();
16663     return false;
16664   }
16665
16666   // Return the underlying Constant value, and update the Offset.  Return false
16667   // for ConstantSDNodes since the same constant pool entry may be represented
16668   // by multiple nodes with different offsets.
16669   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
16670     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
16671                                          : (const void *)C->getConstVal();
16672     Offset += C->getOffset();
16673     return false;
16674   }
16675   // If it's any of the following then it can't alias with anything but itself.
16676   return isa<FrameIndexSDNode>(Base);
16677 }
16678
16679 /// Return true if there is any possibility that the two addresses overlap.
16680 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
16681   // If they are the same then they must be aliases.
16682   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
16683
16684   // If they are both volatile then they cannot be reordered.
16685   if (Op0->isVolatile() && Op1->isVolatile()) return true;
16686
16687   // If one operation reads from invariant memory, and the other may store, they
16688   // cannot alias. These should really be checking the equivalent of mayWrite,
16689   // but it only matters for memory nodes other than load /store.
16690   if (Op0->isInvariant() && Op1->writeMem())
16691     return false;
16692
16693   if (Op1->isInvariant() && Op0->writeMem())
16694     return false;
16695
16696   unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
16697   unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
16698
16699   // Check for BaseIndexOffset matching.
16700   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
16701   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
16702   int64_t PtrDiff;
16703   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
16704     return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
16705
16706   // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
16707   // modified to use BaseIndexOffset.
16708
16709   // Gather base node and offset information.
16710   SDValue Base0, Base1;
16711   int64_t Offset0, Offset1;
16712   const GlobalValue *GV0, *GV1;
16713   const void *CV0, *CV1;
16714   bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
16715                                       Base0, Offset0, GV0, CV0);
16716   bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
16717                                       Base1, Offset1, GV1, CV1);
16718
16719   // If they have the same base address, then check to see if they overlap.
16720   if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
16721     return !((Offset0 + NumBytes0) <= Offset1 ||
16722              (Offset1 + NumBytes1) <= Offset0);
16723
16724   // It is possible for different frame indices to alias each other, mostly
16725   // when tail call optimization reuses return address slots for arguments.
16726   // To catch this case, look up the actual index of frame indices to compute
16727   // the real alias relationship.
16728   if (IsFrameIndex0 && IsFrameIndex1) {
16729     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16730     Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
16731     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
16732     return !((Offset0 + NumBytes0) <= Offset1 ||
16733              (Offset1 + NumBytes1) <= Offset0);
16734   }
16735
16736   // Otherwise, if we know what the bases are, and they aren't identical, then
16737   // we know they cannot alias.
16738   if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
16739     return false;
16740
16741   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
16742   // compared to the size and offset of the access, we may be able to prove they
16743   // do not alias. This check is conservative for now to catch cases created by
16744   // splitting vector types.
16745   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
16746   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
16747   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
16748   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
16749   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
16750       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
16751     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
16752     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
16753
16754     // There is no overlap between these relatively aligned accesses of similar
16755     // size. Return no alias.
16756     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
16757         (OffAlign1 + NumBytes1) <= OffAlign0)
16758       return false;
16759   }
16760
16761   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
16762                    ? CombinerGlobalAA
16763                    : DAG.getSubtarget().useAA();
16764 #ifndef NDEBUG
16765   if (CombinerAAOnlyFunc.getNumOccurrences() &&
16766       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
16767     UseAA = false;
16768 #endif
16769
16770   if (UseAA && AA &&
16771       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
16772     // Use alias analysis information.
16773     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
16774     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
16775     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
16776     AliasResult AAResult =
16777         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
16778                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
16779                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
16780                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
16781     if (AAResult == NoAlias)
16782       return false;
16783   }
16784
16785   // Otherwise we have to assume they alias.
16786   return true;
16787 }
16788
16789 /// Walk up chain skipping non-aliasing memory nodes,
16790 /// looking for aliasing nodes and adding them to the Aliases vector.
16791 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
16792                                    SmallVectorImpl<SDValue> &Aliases) {
16793   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
16794   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
16795
16796   // Get alias information for node.
16797   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
16798
16799   // Starting off.
16800   Chains.push_back(OriginalChain);
16801   unsigned Depth = 0;
16802
16803   // Look at each chain and determine if it is an alias.  If so, add it to the
16804   // aliases list.  If not, then continue up the chain looking for the next
16805   // candidate.
16806   while (!Chains.empty()) {
16807     SDValue Chain = Chains.pop_back_val();
16808
16809     // For TokenFactor nodes, look at each operand and only continue up the
16810     // chain until we reach the depth limit.
16811     //
16812     // FIXME: The depth check could be made to return the last non-aliasing
16813     // chain we found before we hit a tokenfactor rather than the original
16814     // chain.
16815     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
16816       Aliases.clear();
16817       Aliases.push_back(OriginalChain);
16818       return;
16819     }
16820
16821     // Don't bother if we've been before.
16822     if (!Visited.insert(Chain.getNode()).second)
16823       continue;
16824
16825     switch (Chain.getOpcode()) {
16826     case ISD::EntryToken:
16827       // Entry token is ideal chain operand, but handled in FindBetterChain.
16828       break;
16829
16830     case ISD::LOAD:
16831     case ISD::STORE: {
16832       // Get alias information for Chain.
16833       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
16834           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
16835
16836       // If chain is alias then stop here.
16837       if (!(IsLoad && IsOpLoad) &&
16838           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
16839         Aliases.push_back(Chain);
16840       } else {
16841         // Look further up the chain.
16842         Chains.push_back(Chain.getOperand(0));
16843         ++Depth;
16844       }
16845       break;
16846     }
16847
16848     case ISD::TokenFactor:
16849       // We have to check each of the operands of the token factor for "small"
16850       // token factors, so we queue them up.  Adding the operands to the queue
16851       // (stack) in reverse order maintains the original order and increases the
16852       // likelihood that getNode will find a matching token factor (CSE.)
16853       if (Chain.getNumOperands() > 16) {
16854         Aliases.push_back(Chain);
16855         break;
16856       }
16857       for (unsigned n = Chain.getNumOperands(); n;)
16858         Chains.push_back(Chain.getOperand(--n));
16859       ++Depth;
16860       break;
16861
16862     case ISD::CopyFromReg:
16863       // Forward past CopyFromReg.
16864       Chains.push_back(Chain.getOperand(0));
16865       ++Depth;
16866       break;
16867
16868     default:
16869       // For all other instructions we will just have to take what we can get.
16870       Aliases.push_back(Chain);
16871       break;
16872     }
16873   }
16874 }
16875
16876 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
16877 /// (aliasing node.)
16878 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
16879   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
16880
16881   // Accumulate all the aliases to this node.
16882   GatherAllAliases(N, OldChain, Aliases);
16883
16884   // If no operands then chain to entry token.
16885   if (Aliases.size() == 0)
16886     return DAG.getEntryNode();
16887
16888   // If a single operand then chain to it.  We don't need to revisit it.
16889   if (Aliases.size() == 1)
16890     return Aliases[0];
16891
16892   // Construct a custom tailored token factor.
16893   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
16894 }
16895
16896 // This function tries to collect a bunch of potentially interesting
16897 // nodes to improve the chains of, all at once. This might seem
16898 // redundant, as this function gets called when visiting every store
16899 // node, so why not let the work be done on each store as it's visited?
16900 //
16901 // I believe this is mainly important because MergeConsecutiveStores
16902 // is unable to deal with merging stores of different sizes, so unless
16903 // we improve the chains of all the potential candidates up-front
16904 // before running MergeConsecutiveStores, it might only see some of
16905 // the nodes that will eventually be candidates, and then not be able
16906 // to go from a partially-merged state to the desired final
16907 // fully-merged state.
16908 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
16909   // This holds the base pointer, index, and the offset in bytes from the base
16910   // pointer.
16911   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
16912
16913   // We must have a base and an offset.
16914   if (!BasePtr.getBase().getNode())
16915     return false;
16916
16917   // Do not handle stores to undef base pointers.
16918   if (BasePtr.getBase().isUndef())
16919     return false;
16920
16921   SmallVector<StoreSDNode *, 8> ChainedStores;
16922   ChainedStores.push_back(St);
16923
16924   // Walk up the chain and look for nodes with offsets from the same
16925   // base pointer. Stop when reaching an instruction with a different kind
16926   // or instruction which has a different base pointer.
16927   StoreSDNode *Index = St;
16928   while (Index) {
16929     // If the chain has more than one use, then we can't reorder the mem ops.
16930     if (Index != St && !SDValue(Index, 0)->hasOneUse())
16931       break;
16932
16933     if (Index->isVolatile() || Index->isIndexed())
16934       break;
16935
16936     // Find the base pointer and offset for this memory node.
16937     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
16938
16939     // Check that the base pointer is the same as the original one.
16940     if (!BasePtr.equalBaseIndex(Ptr, DAG))
16941       break;
16942
16943     // Walk up the chain to find the next store node, ignoring any
16944     // intermediate loads. Any other kind of node will halt the loop.
16945     SDNode *NextInChain = Index->getChain().getNode();
16946     while (true) {
16947       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
16948         // We found a store node. Use it for the next iteration.
16949         if (STn->isVolatile() || STn->isIndexed()) {
16950           Index = nullptr;
16951           break;
16952         }
16953         ChainedStores.push_back(STn);
16954         Index = STn;
16955         break;
16956       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
16957         NextInChain = Ldn->getChain().getNode();
16958         continue;
16959       } else {
16960         Index = nullptr;
16961         break;
16962       }
16963     } // end while
16964   }
16965
16966   // At this point, ChainedStores lists all of the Store nodes
16967   // reachable by iterating up through chain nodes matching the above
16968   // conditions.  For each such store identified, try to find an
16969   // earlier chain to attach the store to which won't violate the
16970   // required ordering.
16971   bool MadeChangeToSt = false;
16972   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
16973
16974   for (StoreSDNode *ChainedStore : ChainedStores) {
16975     SDValue Chain = ChainedStore->getChain();
16976     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
16977
16978     if (Chain != BetterChain) {
16979       if (ChainedStore == St)
16980         MadeChangeToSt = true;
16981       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
16982     }
16983   }
16984
16985   // Do all replacements after finding the replacements to make to avoid making
16986   // the chains more complicated by introducing new TokenFactors.
16987   for (auto Replacement : BetterChains)
16988     replaceStoreChain(Replacement.first, Replacement.second);
16989
16990   return MadeChangeToSt;
16991 }
16992
16993 /// This is the entry point for the file.
16994 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
16995                            CodeGenOpt::Level OptLevel) {
16996   /// This is the main entry point to this class.
16997   DAGCombiner(*this, AA, OptLevel).Run(Level);
16998 }