contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  11 // both before and after the DAG is legalized.
  12 //
  13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14 // primarily intended to handle simplification opportunities that are implicit
  15 // in the LLVM IR and exposed by the various codegen lowering phases.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #include "llvm/ADT/SetVector.h"
  20 #include "llvm/ADT/SmallBitVector.h"
  21 #include "llvm/ADT/SmallPtrSet.h"
  22 #include "llvm/ADT/SmallSet.h"
  23 #include "llvm/ADT/Statistic.h"
  24 #include "llvm/Analysis/AliasAnalysis.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/LLVMContext.h"
  33 #include "llvm/Support/CommandLine.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Support/KnownBits.h"
  37 #include "llvm/Support/MathExtras.h"
  38 #include "llvm/Support/raw_ostream.h"
  39 #include "llvm/Target/TargetLowering.h"
  40 #include "llvm/Target/TargetOptions.h"
  41 #include "llvm/Target/TargetRegisterInfo.h"
  42 #include "llvm/Target/TargetSubtargetInfo.h"
  43 #include <algorithm>
  44 using namespace llvm;
  45
  46 #define DEBUG_TYPE "dagcombine"
  47
  48 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  49 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  50 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  51 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  52 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  53 STATISTIC(SlicedLoads, "Number of load sliced");
  54
  55 namespace {
  56   static cl::opt<bool>
  57     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  58                cl::desc("Enable DAG combiner's use of IR alias analysis"));
  59
  60   static cl::opt<bool>
  61     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  62                cl::desc("Enable DAG combiner's use of TBAA"));
  63
  64 #ifndef NDEBUG
  65   static cl::opt<std::string>
  66     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  67                cl::desc("Only use DAG-combiner alias analysis in this"
  68                         " function"));
  69 #endif
  70
  71   /// Hidden option to stress test load slicing, i.e., when this option
  72   /// is enabled, load slicing bypasses most of its profitability guards.
  73   static cl::opt<bool>
  74   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
  75                     cl::desc("Bypass the profitability model of load "
  76                              "slicing"),
  77                     cl::init(false));
  78
  79   static cl::opt<bool>
  80     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
  81                       cl::desc("DAG combiner may split indexing from loads"));
  82
  83 //------------------------------ DAGCombiner ---------------------------------//
  84
  85   class DAGCombiner {
  86     SelectionDAG &DAG;
  87     const TargetLowering &TLI;
  88     CombineLevel Level;
  89     CodeGenOpt::Level OptLevel;
  90     bool LegalOperations;
  91     bool LegalTypes;
  92     bool ForCodeSize;
  93
  94     /// \brief Worklist of all of the nodes that need to be simplified.
  95     ///
  96     /// This must behave as a stack -- new nodes to process are pushed onto the
  97     /// back and when processing we pop off of the back.
  98     ///
  99     /// The worklist will not contain duplicates but may contain null entries
 100     /// due to nodes being deleted from the underlying DAG.
 101     SmallVector<SDNode *, 64> Worklist;
 102
 103     /// \brief Mapping from an SDNode to its position on the worklist.
 104     ///
 105     /// This is used to find and remove nodes from the worklist (by nulling
 106     /// them) when they are deleted from the underlying DAG. It relies on
 107     /// stable indices of nodes within the worklist.
 108     DenseMap<SDNode *, unsigned> WorklistMap;
 109
 110     /// \brief Set of nodes which have been combined (at least once).
 111     ///
 112     /// This is used to allow us to reliably add any operands of a DAG node
 113     /// which have not yet been combined to the worklist.
 114     SmallPtrSet<SDNode *, 32> CombinedNodes;
 115
 116     // AA - Used for DAG load/store alias analysis.
 117     AliasAnalysis *AA;
 118
 119     /// When an instruction is simplified, add all users of the instruction to
 120     /// the work lists because they might get more simplified now.
 121     void AddUsersToWorklist(SDNode *N) {
 122       for (SDNode *Node : N->uses())
 123         AddToWorklist(Node);
 124     }
 125
 126     /// Call the node-specific routine that folds each particular type of node.
 127     SDValue visit(SDNode *N);
 128
 129   public:
 130     /// Add to the worklist making sure its instance is at the back (next to be
 131     /// processed.)
 132     void AddToWorklist(SDNode *N) {
 133       assert(N->getOpcode() != ISD::DELETED_NODE &&
 134              "Deleted Node added to Worklist");
 135
 136       // Skip handle nodes as they can't usefully be combined and confuse the
 137       // zero-use deletion strategy.
 138       if (N->getOpcode() == ISD::HANDLENODE)
 139         return;
 140
 141       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 142         Worklist.push_back(N);
 143     }
 144
 145     /// Remove all instances of N from the worklist.
 146     void removeFromWorklist(SDNode *N) {
 147       CombinedNodes.erase(N);
 148
 149       auto It = WorklistMap.find(N);
 150       if (It == WorklistMap.end())
 151         return; // Not in the worklist.
 152
 153       // Null out the entry rather than erasing it to avoid a linear operation.
 154       Worklist[It->second] = nullptr;
 155       WorklistMap.erase(It);
 156     }
 157
 158     void deleteAndRecombine(SDNode *N);
 159     bool recursivelyDeleteUnusedNodes(SDNode *N);
 160
 161     /// Replaces all uses of the results of one DAG node with new values.
 162     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 163                       bool AddTo = true);
 164
 165     /// Replaces all uses of the results of one DAG node with new values.
 166     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 167       return CombineTo(N, &Res, 1, AddTo);
 168     }
 169
 170     /// Replaces all uses of the results of one DAG node with new values.
 171     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 172                       bool AddTo = true) {
 173       SDValue To[] = { Res0, Res1 };
 174       return CombineTo(N, To, 2, AddTo);
 175     }
 176
 177     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 178
 179   private:
 180     unsigned MaximumLegalStoreInBits;
 181
 182     /// Check the specified integer node value to see if it can be simplified or
 183     /// if things it uses can be simplified by bit propagation.
 184     /// If so, return true.
 185     bool SimplifyDemandedBits(SDValue Op) {
 186       unsigned BitWidth = Op.getScalarValueSizeInBits();
 187       APInt Demanded = APInt::getAllOnesValue(BitWidth);
 188       return SimplifyDemandedBits(Op, Demanded);
 189     }
 190
 191     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
 192
 193     bool CombineToPreIndexedLoadStore(SDNode *N);
 194     bool CombineToPostIndexedLoadStore(SDNode *N);
 195     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 196     bool SliceUpLoad(SDNode *N);
 197
 198     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 199     ///   load.
 200     ///
 201     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 202     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 203     /// \param EltNo index of the vector element to load.
 204     /// \param OriginalLoad load that EVE came from to be replaced.
 205     /// \returns EVE on success SDValue() on failure.
 206     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
 207         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
 208     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 209     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 210     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 211     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 212     SDValue PromoteIntBinOp(SDValue Op);
 213     SDValue PromoteIntShiftOp(SDValue Op);
 214     SDValue PromoteExtend(SDValue Op);
 215     bool PromoteLoad(SDValue Op);
 216
 217     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
 218                          SDValue ExtLoad, const SDLoc &DL,
 219                          ISD::NodeType ExtType);
 220
 221     /// Call the node-specific routine that knows how to fold each
 222     /// particular type of node. If that doesn't do anything, try the
 223     /// target-specific DAG combines.
 224     SDValue combine(SDNode *N);
 225
 226     // Visitation implementation - Implement dag node combining for different
 227     // node types.  The semantics are as follows:
 228     // Return Value:
 229     //   SDValue.getNode() == 0 - No change was made
 230     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 231     //   otherwise              - N should be replaced by the returned Operand.
 232     //
 233     SDValue visitTokenFactor(SDNode *N);
 234     SDValue visitMERGE_VALUES(SDNode *N);
 235     SDValue visitADD(SDNode *N);
 236     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 237     SDValue visitSUB(SDNode *N);
 238     SDValue visitADDC(SDNode *N);
 239     SDValue visitUADDO(SDNode *N);
 240     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 241     SDValue visitSUBC(SDNode *N);
 242     SDValue visitUSUBO(SDNode *N);
 243     SDValue visitADDE(SDNode *N);
 244     SDValue visitADDCARRY(SDNode *N);
 245     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 246     SDValue visitSUBE(SDNode *N);
 247     SDValue visitSUBCARRY(SDNode *N);
 248     SDValue visitMUL(SDNode *N);
 249     SDValue useDivRem(SDNode *N);
 250     SDValue visitSDIV(SDNode *N);
 251     SDValue visitUDIV(SDNode *N);
 252     SDValue visitREM(SDNode *N);
 253     SDValue visitMULHU(SDNode *N);
 254     SDValue visitMULHS(SDNode *N);
 255     SDValue visitSMUL_LOHI(SDNode *N);
 256     SDValue visitUMUL_LOHI(SDNode *N);
 257     SDValue visitSMULO(SDNode *N);
 258     SDValue visitUMULO(SDNode *N);
 259     SDValue visitIMINMAX(SDNode *N);
 260     SDValue visitAND(SDNode *N);
 261     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 262     SDValue visitOR(SDNode *N);
 263     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
 264     SDValue visitXOR(SDNode *N);
 265     SDValue SimplifyVBinOp(SDNode *N);
 266     SDValue visitSHL(SDNode *N);
 267     SDValue visitSRA(SDNode *N);
 268     SDValue visitSRL(SDNode *N);
 269     SDValue visitRotate(SDNode *N);
 270     SDValue visitABS(SDNode *N);
 271     SDValue visitBSWAP(SDNode *N);
 272     SDValue visitBITREVERSE(SDNode *N);
 273     SDValue visitCTLZ(SDNode *N);
 274     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 275     SDValue visitCTTZ(SDNode *N);
 276     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 277     SDValue visitCTPOP(SDNode *N);
 278     SDValue visitSELECT(SDNode *N);
 279     SDValue visitVSELECT(SDNode *N);
 280     SDValue visitSELECT_CC(SDNode *N);
 281     SDValue visitSETCC(SDNode *N);
 282     SDValue visitSETCCE(SDNode *N);
 283     SDValue visitSETCCCARRY(SDNode *N);
 284     SDValue visitSIGN_EXTEND(SDNode *N);
 285     SDValue visitZERO_EXTEND(SDNode *N);
 286     SDValue visitANY_EXTEND(SDNode *N);
 287     SDValue visitAssertZext(SDNode *N);
 288     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 289     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 290     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 291     SDValue visitTRUNCATE(SDNode *N);
 292     SDValue visitBITCAST(SDNode *N);
 293     SDValue visitBUILD_PAIR(SDNode *N);
 294     SDValue visitFADD(SDNode *N);
 295     SDValue visitFSUB(SDNode *N);
 296     SDValue visitFMUL(SDNode *N);
 297     SDValue visitFMA(SDNode *N);
 298     SDValue visitFDIV(SDNode *N);
 299     SDValue visitFREM(SDNode *N);
 300     SDValue visitFSQRT(SDNode *N);
 301     SDValue visitFCOPYSIGN(SDNode *N);
 302     SDValue visitSINT_TO_FP(SDNode *N);
 303     SDValue visitUINT_TO_FP(SDNode *N);
 304     SDValue visitFP_TO_SINT(SDNode *N);
 305     SDValue visitFP_TO_UINT(SDNode *N);
 306     SDValue visitFP_ROUND(SDNode *N);
 307     SDValue visitFP_ROUND_INREG(SDNode *N);
 308     SDValue visitFP_EXTEND(SDNode *N);
 309     SDValue visitFNEG(SDNode *N);
 310     SDValue visitFABS(SDNode *N);
 311     SDValue visitFCEIL(SDNode *N);
 312     SDValue visitFTRUNC(SDNode *N);
 313     SDValue visitFFLOOR(SDNode *N);
 314     SDValue visitFMINNUM(SDNode *N);
 315     SDValue visitFMAXNUM(SDNode *N);
 316     SDValue visitBRCOND(SDNode *N);
 317     SDValue visitBR_CC(SDNode *N);
 318     SDValue visitLOAD(SDNode *N);
 319
 320     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 321     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 322
 323     SDValue visitSTORE(SDNode *N);
 324     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 325     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 326     SDValue visitBUILD_VECTOR(SDNode *N);
 327     SDValue visitCONCAT_VECTORS(SDNode *N);
 328     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 329     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 330     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 331     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 332     SDValue visitMLOAD(SDNode *N);
 333     SDValue visitMSTORE(SDNode *N);
 334     SDValue visitMGATHER(SDNode *N);
 335     SDValue visitMSCATTER(SDNode *N);
 336     SDValue visitFP_TO_FP16(SDNode *N);
 337     SDValue visitFP16_TO_FP(SDNode *N);
 338
 339     SDValue visitFADDForFMACombine(SDNode *N);
 340     SDValue visitFSUBForFMACombine(SDNode *N);
 341     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 342
 343     SDValue XformToShuffleWithZero(SDNode *N);
 344     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
 345                            SDValue RHS);
 346
 347     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 348
 349     SDValue foldSelectOfConstants(SDNode *N);
 350     SDValue foldBinOpIntoSelect(SDNode *BO);
 351     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 352     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
 353     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 354     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 355                              SDValue N2, SDValue N3, ISD::CondCode CC,
 356                              bool NotExtCompare = false);
 357     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 358                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 359     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 360                               const SDLoc &DL);
 361     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 362                           const SDLoc &DL, bool foldBooleans = true);
 363
 364     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 365                            SDValue &CC) const;
 366     bool isOneUseSetCC(SDValue N) const;
 367
 368     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 369                                          unsigned HiOp);
 370     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 371     SDValue CombineExtLoad(SDNode *N);
 372     SDValue combineRepeatedFPDivisors(SDNode *N);
 373     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 374     SDValue BuildSDIV(SDNode *N);
 375     SDValue BuildSDIVPow2(SDNode *N);
 376     SDValue BuildUDIV(SDNode *N);
 377     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
 378     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 379     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 380     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 381     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 382     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
 383                                 SDNodeFlags Flags, bool Reciprocal);
 384     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
 385                                 SDNodeFlags Flags, bool Reciprocal);
 386     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 387                                bool DemandHighBits = true);
 388     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 389     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 390                               SDValue InnerPos, SDValue InnerNeg,
 391                               unsigned PosOpcode, unsigned NegOpcode,
 392                               const SDLoc &DL);
 393     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 394     SDValue MatchLoadCombine(SDNode *N);
 395     SDValue ReduceLoadWidth(SDNode *N);
 396     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 397     SDValue splitMergedValStore(StoreSDNode *ST);
 398     SDValue TransformFPLoadStorePair(SDNode *N);
 399     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 400     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
 401     SDValue reduceBuildVecToShuffle(SDNode *N);
 402     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 403                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 404                                   SDValue VecIn2, unsigned LeftIdx);
 405     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
 406
 407     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 408
 409     /// Walk up chain skipping non-aliasing memory nodes,
 410     /// looking for aliasing nodes and adding them to the Aliases vector.
 411     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 412                           SmallVectorImpl<SDValue> &Aliases);
 413
 414     /// Return true if there is any possibility that the two addresses overlap.
 415     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
 416
 417     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 418     /// chain (aliasing node.)
 419     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 420
 421     /// Try to replace a store and any possibly adjacent stores on
 422     /// consecutive chains with better chains. Return true only if St is
 423     /// replaced.
 424     ///
 425     /// Notice that other chains may still be replaced even if the function
 426     /// returns false.
 427     bool findBetterNeighborChains(StoreSDNode *St);
 428
 429     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
 430     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
 431
 432     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 433     /// is located in a sequence of memory operations connected by a chain.
 434     struct MemOpLink {
 435       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 436           : MemNode(N), OffsetFromBase(Offset) {}
 437       // Ptr to the mem node.
 438       LSBaseSDNode *MemNode;
 439       // Offset from the base ptr.
 440       int64_t OffsetFromBase;
 441     };
 442
 443     /// This is a helper function for visitMUL to check the profitability
 444     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 445     /// MulNode is the original multiply, AddNode is (add x, c1),
 446     /// and ConstNode is c2.
 447     bool isMulAddWithConstProfitable(SDNode *MulNode,
 448                                      SDValue &AddNode,
 449                                      SDValue &ConstNode);
 450
 451
 452     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 453     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 454     /// the type of the loaded value to be extended.  LoadedVT returns the type
 455     /// of the original loaded value.  NarrowLoad returns whether the load would
 456     /// need to be narrowed in order to match.
 457     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 458                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
 459                           bool &NarrowLoad);
 460
 461     /// Helper function for MergeConsecutiveStores which merges the
 462     /// component store chains.
 463     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 464                                 unsigned NumStores);
 465
 466     /// This is a helper function for MergeConsecutiveStores. When the source
 467     /// elements of the consecutive stores are all constants or all extracted
 468     /// vector elements, try to merge them into one larger store.
 469     /// \return True if a merged store was created.
 470     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 471                                          EVT MemVT, unsigned NumStores,
 472                                          bool IsConstantSrc, bool UseVector);
 473
 474     /// This is a helper function for MergeConsecutiveStores.
 475     /// Stores that may be merged are placed in StoreNodes.
 476     void getStoreMergeCandidates(StoreSDNode *St,
 477                                  SmallVectorImpl<MemOpLink> &StoreNodes);
 478
 479     /// Helper function for MergeConsecutiveStores. Checks if
 480     /// Candidate stores have indirect dependency through their
 481     /// operands. \return True if safe to merge
 482     bool checkMergeStoreCandidatesForDependencies(
 483         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
 484
 485     /// Merge consecutive store operations into a wide store.
 486     /// This optimization uses wide integers or vectors when possible.
 487     /// \return number of stores that were merged into a merged store (the
 488     /// affected nodes are stored as a prefix in \p StoreNodes).
 489     bool MergeConsecutiveStores(StoreSDNode *N);
 490
 491     /// \brief Try to transform a truncation where C is a constant:
 492     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 493     ///
 494     /// \p N needs to be a truncation and its first operand an AND. Other
 495     /// requirements are checked by the function (e.g. that trunc is
 496     /// single-use) and if missed an empty SDValue is returned.
 497     SDValue distributeTruncateThroughAnd(SDNode *N);
 498
 499   public:
 500     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 501         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 502           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
 503       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
 504
 505       MaximumLegalStoreInBits = 0;
 506       for (MVT VT : MVT::all_valuetypes())
 507         if (EVT(VT).isSimple() && VT != MVT::Other &&
 508             TLI.isTypeLegal(EVT(VT)) &&
 509             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 510           MaximumLegalStoreInBits = VT.getSizeInBits();
 511     }
 512
 513     /// Runs the dag combiner on all nodes in the work list
 514     void Run(CombineLevel AtLevel);
 515
 516     SelectionDAG &getDAG() const { return DAG; }
 517
 518     /// Returns a type large enough to hold any valid shift amount - before type
 519     /// legalization these can be huge.
 520     EVT getShiftAmountTy(EVT LHSTy) {
 521       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 522       if (LHSTy.isVector())
 523         return LHSTy;
 524       auto &DL = DAG.getDataLayout();
 525       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
 526                         : TLI.getPointerTy(DL);
 527     }
 528
 529     /// This method returns true if we are running before type legalization or
 530     /// if the specified VT is legal.
 531     bool isTypeLegal(const EVT &VT) {
 532       if (!LegalTypes) return true;
 533       return TLI.isTypeLegal(VT);
 534     }
 535
 536     /// Convenience wrapper around TargetLowering::getSetCCResultType
 537     EVT getSetCCResultType(EVT VT) const {
 538       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 539     }
 540   };
 541 }
 542
 543
 544 namespace {
 545 /// This class is a DAGUpdateListener that removes any deleted
 546 /// nodes from the worklist.
 547 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 548   DAGCombiner &DC;
 549 public:
 550   explicit WorklistRemover(DAGCombiner &dc)
 551     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 552
 553   void NodeDeleted(SDNode *N, SDNode *E) override {
 554     DC.removeFromWorklist(N);
 555   }
 556 };
 557 }
 558
 559 //===----------------------------------------------------------------------===//
 560 //  TargetLowering::DAGCombinerInfo implementation
 561 //===----------------------------------------------------------------------===//
 562
 563 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 564   ((DAGCombiner*)DC)->AddToWorklist(N);
 565 }
 566
 567 SDValue TargetLowering::DAGCombinerInfo::
 568 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 569   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 570 }
 571
 572 SDValue TargetLowering::DAGCombinerInfo::
 573 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 574   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 575 }
 576
 577
 578 SDValue TargetLowering::DAGCombinerInfo::
 579 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 580   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 581 }
 582
 583 void TargetLowering::DAGCombinerInfo::
 584 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 585   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 586 }
 587
 588 //===----------------------------------------------------------------------===//
 589 // Helper Functions
 590 //===----------------------------------------------------------------------===//
 591
 592 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 593   removeFromWorklist(N);
 594
 595   // If the operands of this node are only used by the node, they will now be
 596   // dead. Make sure to re-visit them and recursively delete dead nodes.
 597   for (const SDValue &Op : N->ops())
 598     // For an operand generating multiple values, one of the values may
 599     // become dead allowing further simplification (e.g. split index
 600     // arithmetic from an indexed load).
 601     if (Op->hasOneUse() || Op->getNumValues() > 1)
 602       AddToWorklist(Op.getNode());
 603
 604   DAG.DeleteNode(N);
 605 }
 606
 607 /// Return 1 if we can compute the negated form of the specified expression for
 608 /// the same cost as the expression itself, or 2 if we can compute the negated
 609 /// form more cheaply than the expression itself.
 610 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 611                                const TargetLowering &TLI,
 612                                const TargetOptions *Options,
 613                                unsigned Depth = 0) {
 614   // fneg is removable even if it has multiple uses.
 615   if (Op.getOpcode() == ISD::FNEG) return 2;
 616
 617   // Don't allow anything with multiple uses.
 618   if (!Op.hasOneUse()) return 0;
 619
 620   // Don't recurse exponentially.
 621   if (Depth > 6) return 0;
 622
 623   switch (Op.getOpcode()) {
 624   default: return false;
 625   case ISD::ConstantFP: {
 626     if (!LegalOperations)
 627       return 1;
 628
 629     // Don't invert constant FP values after legalization unless the target says
 630     // the negated constant is legal.
 631     EVT VT = Op.getValueType();
 632     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 633       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
 634   }
 635   case ISD::FADD:
 636     // FIXME: determine better conditions for this xform.
 637     if (!Options->UnsafeFPMath) return 0;
 638
 639     // After operation legalization, it might not be legal to create new FSUBs.
 640     if (LegalOperations &&
 641         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
 642       return 0;
 643
 644     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 645     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 646                                     Options, Depth + 1))
 647       return V;
 648     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 649     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 650                               Depth + 1);
 651   case ISD::FSUB:
 652     // We can't turn -(A-B) into B-A when we honor signed zeros.
 653     if (!Options->NoSignedZerosFPMath &&
 654         !Op.getNode()->getFlags().hasNoSignedZeros())
 655       return 0;
 656
 657     // fold (fneg (fsub A, B)) -> (fsub B, A)
 658     return 1;
 659
 660   case ISD::FMUL:
 661   case ISD::FDIV:
 662     if (Options->HonorSignDependentRoundingFPMath()) return 0;
 663
 664     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 665     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 666                                     Options, Depth + 1))
 667       return V;
 668
 669     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 670                               Depth + 1);
 671
 672   case ISD::FP_EXTEND:
 673   case ISD::FP_ROUND:
 674   case ISD::FSIN:
 675     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 676                               Depth + 1);
 677   }
 678 }
 679
 680 /// If isNegatibleForFree returns true, return the newly negated expression.
 681 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 682                                     bool LegalOperations, unsigned Depth = 0) {
 683   const TargetOptions &Options = DAG.getTarget().Options;
 684   // fneg is removable even if it has multiple uses.
 685   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 686
 687   // Don't allow anything with multiple uses.
 688   assert(Op.hasOneUse() && "Unknown reuse!");
 689
 690   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 691
 692   const SDNodeFlags Flags = Op.getNode()->getFlags();
 693
 694   switch (Op.getOpcode()) {
 695   default: llvm_unreachable("Unknown code");
 696   case ISD::ConstantFP: {
 697     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 698     V.changeSign();
 699     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 700   }
 701   case ISD::FADD:
 702     // FIXME: determine better conditions for this xform.
 703     assert(Options.UnsafeFPMath);
 704
 705     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 706     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 707                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 708       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 709                          GetNegatedExpression(Op.getOperand(0), DAG,
 710                                               LegalOperations, Depth+1),
 711                          Op.getOperand(1), Flags);
 712     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 713     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 714                        GetNegatedExpression(Op.getOperand(1), DAG,
 715                                             LegalOperations, Depth+1),
 716                        Op.getOperand(0), Flags);
 717   case ISD::FSUB:
 718     // fold (fneg (fsub 0, B)) -> B
 719     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 720       if (N0CFP->isZero())
 721         return Op.getOperand(1);
 722
 723     // fold (fneg (fsub A, B)) -> (fsub B, A)
 724     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 725                        Op.getOperand(1), Op.getOperand(0), Flags);
 726
 727   case ISD::FMUL:
 728   case ISD::FDIV:
 729     assert(!Options.HonorSignDependentRoundingFPMath());
 730
 731     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 732     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 733                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 734       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 735                          GetNegatedExpression(Op.getOperand(0), DAG,
 736                                               LegalOperations, Depth+1),
 737                          Op.getOperand(1), Flags);
 738
 739     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 740     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 741                        Op.getOperand(0),
 742                        GetNegatedExpression(Op.getOperand(1), DAG,
 743                                             LegalOperations, Depth+1), Flags);
 744
 745   case ISD::FP_EXTEND:
 746   case ISD::FSIN:
 747     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 748                        GetNegatedExpression(Op.getOperand(0), DAG,
 749                                             LegalOperations, Depth+1));
 750   case ISD::FP_ROUND:
 751       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 752                          GetNegatedExpression(Op.getOperand(0), DAG,
 753                                               LegalOperations, Depth+1),
 754                          Op.getOperand(1));
 755   }
 756 }
 757
 758 // APInts must be the same size for most operations, this helper
 759 // function zero extends the shorter of the pair so that they match.
 760 // We provide an Offset so that we can create bitwidths that won't overflow.
 761 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 762   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 763   LHS = LHS.zextOrSelf(Bits);
 764   RHS = RHS.zextOrSelf(Bits);
 765 }
 766
 767 // Return true if this node is a setcc, or is a select_cc
 768 // that selects between the target values used for true and false, making it
 769 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 770 // the appropriate nodes based on the type of node we are checking. This
 771 // simplifies life a bit for the callers.
 772 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 773                                     SDValue &CC) const {
 774   if (N.getOpcode() == ISD::SETCC) {
 775     LHS = N.getOperand(0);
 776     RHS = N.getOperand(1);
 777     CC  = N.getOperand(2);
 778     return true;
 779   }
 780
 781   if (N.getOpcode() != ISD::SELECT_CC ||
 782       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 783       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 784     return false;
 785
 786   if (TLI.getBooleanContents(N.getValueType()) ==
 787       TargetLowering::UndefinedBooleanContent)
 788     return false;
 789
 790   LHS = N.getOperand(0);
 791   RHS = N.getOperand(1);
 792   CC  = N.getOperand(4);
 793   return true;
 794 }
 795
 796 /// Return true if this is a SetCC-equivalent operation with only one use.
 797 /// If this is true, it allows the users to invert the operation for free when
 798 /// it is profitable to do so.
 799 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 800   SDValue N0, N1, N2;
 801   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 802     return true;
 803   return false;
 804 }
 805
 806 // \brief Returns the SDNode if it is a constant float BuildVector
 807 // or constant float.
 808 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 809   if (isa<ConstantFPSDNode>(N))
 810     return N.getNode();
 811   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 812     return N.getNode();
 813   return nullptr;
 814 }
 815
 816 // Determines if it is a constant integer or a build vector of constant
 817 // integers (and undefs).
 818 // Do not permit build vector implicit truncation.
 819 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 820   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 821     return !(Const->isOpaque() && NoOpaques);
 822   if (N.getOpcode() != ISD::BUILD_VECTOR)
 823     return false;
 824   unsigned BitWidth = N.getScalarValueSizeInBits();
 825   for (const SDValue &Op : N->op_values()) {
 826     if (Op.isUndef())
 827       continue;
 828     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 829     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 830         (Const->isOpaque() && NoOpaques))
 831       return false;
 832   }
 833   return true;
 834 }
 835
 836 // Determines if it is a constant null integer or a splatted vector of a
 837 // constant null integer (with no undefs).
 838 // Build vector implicit truncation is not an issue for null values.
 839 static bool isNullConstantOrNullSplatConstant(SDValue N) {
 840   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 841     return Splat->isNullValue();
 842   return false;
 843 }
 844
 845 // Determines if it is a constant integer of one or a splatted vector of a
 846 // constant integer of one (with no undefs).
 847 // Do not permit build vector implicit truncation.
 848 static bool isOneConstantOrOneSplatConstant(SDValue N) {
 849   unsigned BitWidth = N.getScalarValueSizeInBits();
 850   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 851     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
 852   return false;
 853 }
 854
 855 // Determines if it is a constant integer of all ones or a splatted vector of a
 856 // constant integer of all ones (with no undefs).
 857 // Do not permit build vector implicit truncation.
 858 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
 859   unsigned BitWidth = N.getScalarValueSizeInBits();
 860   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 861     return Splat->isAllOnesValue() &&
 862            Splat->getAPIntValue().getBitWidth() == BitWidth;
 863   return false;
 864 }
 865
 866 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 867 // undef's.
 868 static bool isAnyConstantBuildVector(const SDNode *N) {
 869   return ISD::isBuildVectorOfConstantSDNodes(N) ||
 870          ISD::isBuildVectorOfConstantFPSDNodes(N);
 871 }
 872
 873 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 874                                     SDValue N1) {
 875   EVT VT = N0.getValueType();
 876   if (N0.getOpcode() == Opc) {
 877     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 878       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 879         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
 880         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
 881           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 882         return SDValue();
 883       }
 884       if (N0.hasOneUse()) {
 885         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
 886         // use
 887         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 888         if (!OpNode.getNode())
 889           return SDValue();
 890         AddToWorklist(OpNode.getNode());
 891         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 892       }
 893     }
 894   }
 895
 896   if (N1.getOpcode() == Opc) {
 897     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
 898       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
 899         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
 900         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
 901           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
 902         return SDValue();
 903       }
 904       if (N1.hasOneUse()) {
 905         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
 906         // use
 907         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
 908         if (!OpNode.getNode())
 909           return SDValue();
 910         AddToWorklist(OpNode.getNode());
 911         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
 912       }
 913     }
 914   }
 915
 916   return SDValue();
 917 }
 918
 919 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 920                                bool AddTo) {
 921   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 922   ++NodesCombined;
 923   DEBUG(dbgs() << "\nReplacing.1 ";
 924         N->dump(&DAG);
 925         dbgs() << "\nWith: ";
 926         To[0].getNode()->dump(&DAG);
 927         dbgs() << " and " << NumTo-1 << " other values\n");
 928   for (unsigned i = 0, e = NumTo; i != e; ++i)
 929     assert((!To[i].getNode() ||
 930             N->getValueType(i) == To[i].getValueType()) &&
 931            "Cannot combine value to value of different type!");
 932
 933   WorklistRemover DeadNodes(*this);
 934   DAG.ReplaceAllUsesWith(N, To);
 935   if (AddTo) {
 936     // Push the new nodes and any users onto the worklist
 937     for (unsigned i = 0, e = NumTo; i != e; ++i) {
 938       if (To[i].getNode()) {
 939         AddToWorklist(To[i].getNode());
 940         AddUsersToWorklist(To[i].getNode());
 941       }
 942     }
 943   }
 944
 945   // Finally, if the node is now dead, remove it from the graph.  The node
 946   // may not be dead if the replacement process recursively simplified to
 947   // something else needing this node.
 948   if (N->use_empty())
 949     deleteAndRecombine(N);
 950   return SDValue(N, 0);
 951 }
 952
 953 void DAGCombiner::
 954 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 955   // Replace all uses.  If any nodes become isomorphic to other nodes and
 956   // are deleted, make sure to remove them from our worklist.
 957   WorklistRemover DeadNodes(*this);
 958   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
 959
 960   // Push the new node and any (possibly new) users onto the worklist.
 961   AddToWorklist(TLO.New.getNode());
 962   AddUsersToWorklist(TLO.New.getNode());
 963
 964   // Finally, if the node is now dead, remove it from the graph.  The node
 965   // may not be dead if the replacement process recursively simplified to
 966   // something else needing this node.
 967   if (TLO.Old.getNode()->use_empty())
 968     deleteAndRecombine(TLO.Old.getNode());
 969 }
 970
 971 /// Check the specified integer node value to see if it can be simplified or if
 972 /// things it uses can be simplified by bit propagation. If so, return true.
 973 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 974   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 975   KnownBits Known;
 976   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
 977     return false;
 978
 979   // Revisit the node.
 980   AddToWorklist(Op.getNode());
 981
 982   // Replace the old value with the new one.
 983   ++NodesCombined;
 984   DEBUG(dbgs() << "\nReplacing.2 ";
 985         TLO.Old.getNode()->dump(&DAG);
 986         dbgs() << "\nWith: ";
 987         TLO.New.getNode()->dump(&DAG);
 988         dbgs() << '\n');
 989
 990   CommitTargetLoweringOpt(TLO);
 991   return true;
 992 }
 993
 994 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
 995   SDLoc DL(Load);
 996   EVT VT = Load->getValueType(0);
 997   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
 998
 999   DEBUG(dbgs() << "\nReplacing.9 ";
1000         Load->dump(&DAG);
1001         dbgs() << "\nWith: ";
1002         Trunc.getNode()->dump(&DAG);
1003         dbgs() << '\n');
1004   WorklistRemover DeadNodes(*this);
1005   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1006   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1007   deleteAndRecombine(Load);
1008   AddToWorklist(Trunc.getNode());
1009 }
1010
1011 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1012   Replace = false;
1013   SDLoc DL(Op);
1014   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1015     LoadSDNode *LD = cast<LoadSDNode>(Op);
1016     EVT MemVT = LD->getMemoryVT();
1017     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1018       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1019                                                        : ISD::EXTLOAD)
1020       : LD->getExtensionType();
1021     Replace = true;
1022     return DAG.getExtLoad(ExtType, DL, PVT,
1023                           LD->getChain(), LD->getBasePtr(),
1024                           MemVT, LD->getMemOperand());
1025   }
1026
1027   unsigned Opc = Op.getOpcode();
1028   switch (Opc) {
1029   default: break;
1030   case ISD::AssertSext:
1031     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1032       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1033     break;
1034   case ISD::AssertZext:
1035     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1036       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1037     break;
1038   case ISD::Constant: {
1039     unsigned ExtOpc =
1040       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1041     return DAG.getNode(ExtOpc, DL, PVT, Op);
1042   }
1043   }
1044
1045   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1046     return SDValue();
1047   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1048 }
1049
1050 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1051   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1052     return SDValue();
1053   EVT OldVT = Op.getValueType();
1054   SDLoc DL(Op);
1055   bool Replace = false;
1056   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1057   if (!NewOp.getNode())
1058     return SDValue();
1059   AddToWorklist(NewOp.getNode());
1060
1061   if (Replace)
1062     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1063   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1064                      DAG.getValueType(OldVT));
1065 }
1066
1067 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1068   EVT OldVT = Op.getValueType();
1069   SDLoc DL(Op);
1070   bool Replace = false;
1071   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1072   if (!NewOp.getNode())
1073     return SDValue();
1074   AddToWorklist(NewOp.getNode());
1075
1076   if (Replace)
1077     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1078   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1079 }
1080
1081 /// Promote the specified integer binary operation if the target indicates it is
1082 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1083 /// i32 since i16 instructions are longer.
1084 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1085   if (!LegalOperations)
1086     return SDValue();
1087
1088   EVT VT = Op.getValueType();
1089   if (VT.isVector() || !VT.isInteger())
1090     return SDValue();
1091
1092   // If operation type is 'undesirable', e.g. i16 on x86, consider
1093   // promoting it.
1094   unsigned Opc = Op.getOpcode();
1095   if (TLI.isTypeDesirableForOp(Opc, VT))
1096     return SDValue();
1097
1098   EVT PVT = VT;
1099   // Consult target whether it is a good idea to promote this operation and
1100   // what's the right type to promote it to.
1101   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1102     assert(PVT != VT && "Don't know what type to promote to!");
1103
1104     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1105
1106     bool Replace0 = false;
1107     SDValue N0 = Op.getOperand(0);
1108     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1109
1110     bool Replace1 = false;
1111     SDValue N1 = Op.getOperand(1);
1112     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1113     SDLoc DL(Op);
1114
1115     SDValue RV =
1116         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1117
1118     // New replace instances of N0 and N1
1119     if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
1120         NN0.getOpcode() != ISD::DELETED_NODE) {
1121       AddToWorklist(NN0.getNode());
1122       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1123     }
1124
1125     if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
1126         NN1.getOpcode() != ISD::DELETED_NODE) {
1127       AddToWorklist(NN1.getNode());
1128       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1129     }
1130
1131     // Deal with Op being deleted.
1132     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1133       return RV;
1134   }
1135   return SDValue();
1136 }
1137
1138 /// Promote the specified integer shift operation if the target indicates it is
1139 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1140 /// i32 since i16 instructions are longer.
1141 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1142   if (!LegalOperations)
1143     return SDValue();
1144
1145   EVT VT = Op.getValueType();
1146   if (VT.isVector() || !VT.isInteger())
1147     return SDValue();
1148
1149   // If operation type is 'undesirable', e.g. i16 on x86, consider
1150   // promoting it.
1151   unsigned Opc = Op.getOpcode();
1152   if (TLI.isTypeDesirableForOp(Opc, VT))
1153     return SDValue();
1154
1155   EVT PVT = VT;
1156   // Consult target whether it is a good idea to promote this operation and
1157   // what's the right type to promote it to.
1158   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1159     assert(PVT != VT && "Don't know what type to promote to!");
1160
1161     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1162
1163     bool Replace = false;
1164     SDValue N0 = Op.getOperand(0);
1165     SDValue N1 = Op.getOperand(1);
1166     if (Opc == ISD::SRA)
1167       N0 = SExtPromoteOperand(N0, PVT);
1168     else if (Opc == ISD::SRL)
1169       N0 = ZExtPromoteOperand(N0, PVT);
1170     else
1171       N0 = PromoteOperand(N0, PVT, Replace);
1172
1173     if (!N0.getNode())
1174       return SDValue();
1175
1176     SDLoc DL(Op);
1177     SDValue RV =
1178         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1179
1180     AddToWorklist(N0.getNode());
1181     if (Replace)
1182       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1183
1184     // Deal with Op being deleted.
1185     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1186       return RV;
1187   }
1188   return SDValue();
1189 }
1190
1191 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1192   if (!LegalOperations)
1193     return SDValue();
1194
1195   EVT VT = Op.getValueType();
1196   if (VT.isVector() || !VT.isInteger())
1197     return SDValue();
1198
1199   // If operation type is 'undesirable', e.g. i16 on x86, consider
1200   // promoting it.
1201   unsigned Opc = Op.getOpcode();
1202   if (TLI.isTypeDesirableForOp(Opc, VT))
1203     return SDValue();
1204
1205   EVT PVT = VT;
1206   // Consult target whether it is a good idea to promote this operation and
1207   // what's the right type to promote it to.
1208   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1209     assert(PVT != VT && "Don't know what type to promote to!");
1210     // fold (aext (aext x)) -> (aext x)
1211     // fold (aext (zext x)) -> (zext x)
1212     // fold (aext (sext x)) -> (sext x)
1213     DEBUG(dbgs() << "\nPromoting ";
1214           Op.getNode()->dump(&DAG));
1215     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1216   }
1217   return SDValue();
1218 }
1219
1220 bool DAGCombiner::PromoteLoad(SDValue Op) {
1221   if (!LegalOperations)
1222     return false;
1223
1224   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1225     return false;
1226
1227   EVT VT = Op.getValueType();
1228   if (VT.isVector() || !VT.isInteger())
1229     return false;
1230
1231   // If operation type is 'undesirable', e.g. i16 on x86, consider
1232   // promoting it.
1233   unsigned Opc = Op.getOpcode();
1234   if (TLI.isTypeDesirableForOp(Opc, VT))
1235     return false;
1236
1237   EVT PVT = VT;
1238   // Consult target whether it is a good idea to promote this operation and
1239   // what's the right type to promote it to.
1240   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1241     assert(PVT != VT && "Don't know what type to promote to!");
1242
1243     SDLoc DL(Op);
1244     SDNode *N = Op.getNode();
1245     LoadSDNode *LD = cast<LoadSDNode>(N);
1246     EVT MemVT = LD->getMemoryVT();
1247     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1248       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1249                                                        : ISD::EXTLOAD)
1250       : LD->getExtensionType();
1251     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1252                                    LD->getChain(), LD->getBasePtr(),
1253                                    MemVT, LD->getMemOperand());
1254     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1255
1256     DEBUG(dbgs() << "\nPromoting ";
1257           N->dump(&DAG);
1258           dbgs() << "\nTo: ";
1259           Result.getNode()->dump(&DAG);
1260           dbgs() << '\n');
1261     WorklistRemover DeadNodes(*this);
1262     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1263     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1264     deleteAndRecombine(N);
1265     AddToWorklist(Result.getNode());
1266     return true;
1267   }
1268   return false;
1269 }
1270
1271 /// \brief Recursively delete a node which has no uses and any operands for
1272 /// which it is the only use.
1273 ///
1274 /// Note that this both deletes the nodes and removes them from the worklist.
1275 /// It also adds any nodes who have had a user deleted to the worklist as they
1276 /// may now have only one use and subject to other combines.
1277 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1278   if (!N->use_empty())
1279     return false;
1280
1281   SmallSetVector<SDNode *, 16> Nodes;
1282   Nodes.insert(N);
1283   do {
1284     N = Nodes.pop_back_val();
1285     if (!N)
1286       continue;
1287
1288     if (N->use_empty()) {
1289       for (const SDValue &ChildN : N->op_values())
1290         Nodes.insert(ChildN.getNode());
1291
1292       removeFromWorklist(N);
1293       DAG.DeleteNode(N);
1294     } else {
1295       AddToWorklist(N);
1296     }
1297   } while (!Nodes.empty());
1298   return true;
1299 }
1300
1301 //===----------------------------------------------------------------------===//
1302 //  Main DAG Combiner implementation
1303 //===----------------------------------------------------------------------===//
1304
1305 void DAGCombiner::Run(CombineLevel AtLevel) {
1306   // set the instance variables, so that the various visit routines may use it.
1307   Level = AtLevel;
1308   LegalOperations = Level >= AfterLegalizeVectorOps;
1309   LegalTypes = Level >= AfterLegalizeTypes;
1310
1311   // Add all the dag nodes to the worklist.
1312   for (SDNode &Node : DAG.allnodes())
1313     AddToWorklist(&Node);
1314
1315   // Create a dummy node (which is not added to allnodes), that adds a reference
1316   // to the root node, preventing it from being deleted, and tracking any
1317   // changes of the root.
1318   HandleSDNode Dummy(DAG.getRoot());
1319
1320   // While the worklist isn't empty, find a node and try to combine it.
1321   while (!WorklistMap.empty()) {
1322     SDNode *N;
1323     // The Worklist holds the SDNodes in order, but it may contain null entries.
1324     do {
1325       N = Worklist.pop_back_val();
1326     } while (!N);
1327
1328     bool GoodWorklistEntry = WorklistMap.erase(N);
1329     (void)GoodWorklistEntry;
1330     assert(GoodWorklistEntry &&
1331            "Found a worklist entry without a corresponding map entry!");
1332
1333     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1334     // N is deleted from the DAG, since they too may now be dead or may have a
1335     // reduced number of uses, allowing other xforms.
1336     if (recursivelyDeleteUnusedNodes(N))
1337       continue;
1338
1339     WorklistRemover DeadNodes(*this);
1340
1341     // If this combine is running after legalizing the DAG, re-legalize any
1342     // nodes pulled off the worklist.
1343     if (Level == AfterLegalizeDAG) {
1344       SmallSetVector<SDNode *, 16> UpdatedNodes;
1345       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1346
1347       for (SDNode *LN : UpdatedNodes) {
1348         AddToWorklist(LN);
1349         AddUsersToWorklist(LN);
1350       }
1351       if (!NIsValid)
1352         continue;
1353     }
1354
1355     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1356
1357     // Add any operands of the new node which have not yet been combined to the
1358     // worklist as well. Because the worklist uniques things already, this
1359     // won't repeatedly process the same operand.
1360     CombinedNodes.insert(N);
1361     for (const SDValue &ChildN : N->op_values())
1362       if (!CombinedNodes.count(ChildN.getNode()))
1363         AddToWorklist(ChildN.getNode());
1364
1365     SDValue RV = combine(N);
1366
1367     if (!RV.getNode())
1368       continue;
1369
1370     ++NodesCombined;
1371
1372     // If we get back the same node we passed in, rather than a new node or
1373     // zero, we know that the node must have defined multiple values and
1374     // CombineTo was used.  Since CombineTo takes care of the worklist
1375     // mechanics for us, we have no work to do in this case.
1376     if (RV.getNode() == N)
1377       continue;
1378
1379     assert(N->getOpcode() != ISD::DELETED_NODE &&
1380            RV.getOpcode() != ISD::DELETED_NODE &&
1381            "Node was deleted but visit returned new node!");
1382
1383     DEBUG(dbgs() << " ... into: ";
1384           RV.getNode()->dump(&DAG));
1385
1386     if (N->getNumValues() == RV.getNode()->getNumValues())
1387       DAG.ReplaceAllUsesWith(N, RV.getNode());
1388     else {
1389       assert(N->getValueType(0) == RV.getValueType() &&
1390              N->getNumValues() == 1 && "Type mismatch");
1391       DAG.ReplaceAllUsesWith(N, &RV);
1392     }
1393
1394     // Push the new node and any users onto the worklist
1395     AddToWorklist(RV.getNode());
1396     AddUsersToWorklist(RV.getNode());
1397
1398     // Finally, if the node is now dead, remove it from the graph.  The node
1399     // may not be dead if the replacement process recursively simplified to
1400     // something else needing this node. This will also take care of adding any
1401     // operands which have lost a user to the worklist.
1402     recursivelyDeleteUnusedNodes(N);
1403   }
1404
1405   // If the root changed (e.g. it was a dead load, update the root).
1406   DAG.setRoot(Dummy.getValue());
1407   DAG.RemoveDeadNodes();
1408 }
1409
1410 SDValue DAGCombiner::visit(SDNode *N) {
1411   switch (N->getOpcode()) {
1412   default: break;
1413   case ISD::TokenFactor:        return visitTokenFactor(N);
1414   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1415   case ISD::ADD:                return visitADD(N);
1416   case ISD::SUB:                return visitSUB(N);
1417   case ISD::ADDC:               return visitADDC(N);
1418   case ISD::UADDO:              return visitUADDO(N);
1419   case ISD::SUBC:               return visitSUBC(N);
1420   case ISD::USUBO:              return visitUSUBO(N);
1421   case ISD::ADDE:               return visitADDE(N);
1422   case ISD::ADDCARRY:           return visitADDCARRY(N);
1423   case ISD::SUBE:               return visitSUBE(N);
1424   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1425   case ISD::MUL:                return visitMUL(N);
1426   case ISD::SDIV:               return visitSDIV(N);
1427   case ISD::UDIV:               return visitUDIV(N);
1428   case ISD::SREM:
1429   case ISD::UREM:               return visitREM(N);
1430   case ISD::MULHU:              return visitMULHU(N);
1431   case ISD::MULHS:              return visitMULHS(N);
1432   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1433   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1434   case ISD::SMULO:              return visitSMULO(N);
1435   case ISD::UMULO:              return visitUMULO(N);
1436   case ISD::SMIN:
1437   case ISD::SMAX:
1438   case ISD::UMIN:
1439   case ISD::UMAX:               return visitIMINMAX(N);
1440   case ISD::AND:                return visitAND(N);
1441   case ISD::OR:                 return visitOR(N);
1442   case ISD::XOR:                return visitXOR(N);
1443   case ISD::SHL:                return visitSHL(N);
1444   case ISD::SRA:                return visitSRA(N);
1445   case ISD::SRL:                return visitSRL(N);
1446   case ISD::ROTR:
1447   case ISD::ROTL:               return visitRotate(N);
1448   case ISD::ABS:                return visitABS(N);
1449   case ISD::BSWAP:              return visitBSWAP(N);
1450   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1451   case ISD::CTLZ:               return visitCTLZ(N);
1452   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1453   case ISD::CTTZ:               return visitCTTZ(N);
1454   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1455   case ISD::CTPOP:              return visitCTPOP(N);
1456   case ISD::SELECT:             return visitSELECT(N);
1457   case ISD::VSELECT:            return visitVSELECT(N);
1458   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1459   case ISD::SETCC:              return visitSETCC(N);
1460   case ISD::SETCCE:             return visitSETCCE(N);
1461   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1462   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1463   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1464   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1465   case ISD::AssertZext:         return visitAssertZext(N);
1466   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1467   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1468   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1469   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1470   case ISD::BITCAST:            return visitBITCAST(N);
1471   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1472   case ISD::FADD:               return visitFADD(N);
1473   case ISD::FSUB:               return visitFSUB(N);
1474   case ISD::FMUL:               return visitFMUL(N);
1475   case ISD::FMA:                return visitFMA(N);
1476   case ISD::FDIV:               return visitFDIV(N);
1477   case ISD::FREM:               return visitFREM(N);
1478   case ISD::FSQRT:              return visitFSQRT(N);
1479   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1480   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1481   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1482   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1483   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1484   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1485   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1486   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1487   case ISD::FNEG:               return visitFNEG(N);
1488   case ISD::FABS:               return visitFABS(N);
1489   case ISD::FFLOOR:             return visitFFLOOR(N);
1490   case ISD::FMINNUM:            return visitFMINNUM(N);
1491   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1492   case ISD::FCEIL:              return visitFCEIL(N);
1493   case ISD::FTRUNC:             return visitFTRUNC(N);
1494   case ISD::BRCOND:             return visitBRCOND(N);
1495   case ISD::BR_CC:              return visitBR_CC(N);
1496   case ISD::LOAD:               return visitLOAD(N);
1497   case ISD::STORE:              return visitSTORE(N);
1498   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1499   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1500   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1501   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1502   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1503   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1504   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1505   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1506   case ISD::MGATHER:            return visitMGATHER(N);
1507   case ISD::MLOAD:              return visitMLOAD(N);
1508   case ISD::MSCATTER:           return visitMSCATTER(N);
1509   case ISD::MSTORE:             return visitMSTORE(N);
1510   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1511   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1512   }
1513   return SDValue();
1514 }
1515
1516 SDValue DAGCombiner::combine(SDNode *N) {
1517   SDValue RV = visit(N);
1518
1519   // If nothing happened, try a target-specific DAG combine.
1520   if (!RV.getNode()) {
1521     assert(N->getOpcode() != ISD::DELETED_NODE &&
1522            "Node was deleted but visit returned NULL!");
1523
1524     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1525         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1526
1527       // Expose the DAG combiner to the target combiner impls.
1528       TargetLowering::DAGCombinerInfo
1529         DagCombineInfo(DAG, Level, false, this);
1530
1531       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1532     }
1533   }
1534
1535   // If nothing happened still, try promoting the operation.
1536   if (!RV.getNode()) {
1537     switch (N->getOpcode()) {
1538     default: break;
1539     case ISD::ADD:
1540     case ISD::SUB:
1541     case ISD::MUL:
1542     case ISD::AND:
1543     case ISD::OR:
1544     case ISD::XOR:
1545       RV = PromoteIntBinOp(SDValue(N, 0));
1546       break;
1547     case ISD::SHL:
1548     case ISD::SRA:
1549     case ISD::SRL:
1550       RV = PromoteIntShiftOp(SDValue(N, 0));
1551       break;
1552     case ISD::SIGN_EXTEND:
1553     case ISD::ZERO_EXTEND:
1554     case ISD::ANY_EXTEND:
1555       RV = PromoteExtend(SDValue(N, 0));
1556       break;
1557     case ISD::LOAD:
1558       if (PromoteLoad(SDValue(N, 0)))
1559         RV = SDValue(N, 0);
1560       break;
1561     }
1562   }
1563
1564   // If N is a commutative binary node, try commuting it to enable more
1565   // sdisel CSE.
1566   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1567       N->getNumValues() == 1) {
1568     SDValue N0 = N->getOperand(0);
1569     SDValue N1 = N->getOperand(1);
1570
1571     // Constant operands are canonicalized to RHS.
1572     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1573       SDValue Ops[] = {N1, N0};
1574       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1575                                             N->getFlags());
1576       if (CSENode)
1577         return SDValue(CSENode, 0);
1578     }
1579   }
1580
1581   return RV;
1582 }
1583
1584 /// Given a node, return its input chain if it has one, otherwise return a null
1585 /// sd operand.
1586 static SDValue getInputChainForNode(SDNode *N) {
1587   if (unsigned NumOps = N->getNumOperands()) {
1588     if (N->getOperand(0).getValueType() == MVT::Other)
1589       return N->getOperand(0);
1590     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1591       return N->getOperand(NumOps-1);
1592     for (unsigned i = 1; i < NumOps-1; ++i)
1593       if (N->getOperand(i).getValueType() == MVT::Other)
1594         return N->getOperand(i);
1595   }
1596   return SDValue();
1597 }
1598
1599 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1600   // If N has two operands, where one has an input chain equal to the other,
1601   // the 'other' chain is redundant.
1602   if (N->getNumOperands() == 2) {
1603     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1604       return N->getOperand(0);
1605     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1606       return N->getOperand(1);
1607   }
1608
1609   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1610   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1611   SmallPtrSet<SDNode*, 16> SeenOps;
1612   bool Changed = false;             // If we should replace this token factor.
1613
1614   // Start out with this token factor.
1615   TFs.push_back(N);
1616
1617   // Iterate through token factors.  The TFs grows when new token factors are
1618   // encountered.
1619   for (unsigned i = 0; i < TFs.size(); ++i) {
1620     SDNode *TF = TFs[i];
1621
1622     // Check each of the operands.
1623     for (const SDValue &Op : TF->op_values()) {
1624
1625       switch (Op.getOpcode()) {
1626       case ISD::EntryToken:
1627         // Entry tokens don't need to be added to the list. They are
1628         // redundant.
1629         Changed = true;
1630         break;
1631
1632       case ISD::TokenFactor:
1633         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1634           // Queue up for processing.
1635           TFs.push_back(Op.getNode());
1636           // Clean up in case the token factor is removed.
1637           AddToWorklist(Op.getNode());
1638           Changed = true;
1639           break;
1640         }
1641         LLVM_FALLTHROUGH;
1642
1643       default:
1644         // Only add if it isn't already in the list.
1645         if (SeenOps.insert(Op.getNode()).second)
1646           Ops.push_back(Op);
1647         else
1648           Changed = true;
1649         break;
1650       }
1651     }
1652   }
1653
1654   // Remove Nodes that are chained to another node in the list. Do so
1655   // by walking up chains breath-first stopping when we've seen
1656   // another operand. In general we must climb to the EntryNode, but we can exit
1657   // early if we find all remaining work is associated with just one operand as
1658   // no further pruning is possible.
1659
1660   // List of nodes to search through and original Ops from which they originate.
1661   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1662   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1663   SmallPtrSet<SDNode *, 16> SeenChains;
1664   bool DidPruneOps = false;
1665
1666   unsigned NumLeftToConsider = 0;
1667   for (const SDValue &Op : Ops) {
1668     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1669     OpWorkCount.push_back(1);
1670   }
1671
1672   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1673     // If this is an Op, we can remove the op from the list. Remark any
1674     // search associated with it as from the current OpNumber.
1675     if (SeenOps.count(Op) != 0) {
1676       Changed = true;
1677       DidPruneOps = true;
1678       unsigned OrigOpNumber = 0;
1679       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1680         OrigOpNumber++;
1681       assert((OrigOpNumber != Ops.size()) &&
1682              "expected to find TokenFactor Operand");
1683       // Re-mark worklist from OrigOpNumber to OpNumber
1684       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1685         if (Worklist[i].second == OrigOpNumber) {
1686           Worklist[i].second = OpNumber;
1687         }
1688       }
1689       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1690       OpWorkCount[OrigOpNumber] = 0;
1691       NumLeftToConsider--;
1692     }
1693     // Add if it's a new chain
1694     if (SeenChains.insert(Op).second) {
1695       OpWorkCount[OpNumber]++;
1696       Worklist.push_back(std::make_pair(Op, OpNumber));
1697     }
1698   };
1699
1700   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1701     // We need at least be consider at least 2 Ops to prune.
1702     if (NumLeftToConsider <= 1)
1703       break;
1704     auto CurNode = Worklist[i].first;
1705     auto CurOpNumber = Worklist[i].second;
1706     assert((OpWorkCount[CurOpNumber] > 0) &&
1707            "Node should not appear in worklist");
1708     switch (CurNode->getOpcode()) {
1709     case ISD::EntryToken:
1710       // Hitting EntryToken is the only way for the search to terminate without
1711       // hitting
1712       // another operand's search. Prevent us from marking this operand
1713       // considered.
1714       NumLeftToConsider++;
1715       break;
1716     case ISD::TokenFactor:
1717       for (const SDValue &Op : CurNode->op_values())
1718         AddToWorklist(i, Op.getNode(), CurOpNumber);
1719       break;
1720     case ISD::CopyFromReg:
1721     case ISD::CopyToReg:
1722       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1723       break;
1724     default:
1725       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1726         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1727       break;
1728     }
1729     OpWorkCount[CurOpNumber]--;
1730     if (OpWorkCount[CurOpNumber] == 0)
1731       NumLeftToConsider--;
1732   }
1733
1734   // If we've changed things around then replace token factor.
1735   if (Changed) {
1736     SDValue Result;
1737     if (Ops.empty()) {
1738       // The entry token is the only possible outcome.
1739       Result = DAG.getEntryNode();
1740     } else {
1741       if (DidPruneOps) {
1742         SmallVector<SDValue, 8> PrunedOps;
1743         //
1744         for (const SDValue &Op : Ops) {
1745           if (SeenChains.count(Op.getNode()) == 0)
1746             PrunedOps.push_back(Op);
1747         }
1748         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1749       } else {
1750         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1751       }
1752     }
1753     return Result;
1754   }
1755   return SDValue();
1756 }
1757
1758 /// MERGE_VALUES can always be eliminated.
1759 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1760   WorklistRemover DeadNodes(*this);
1761   // Replacing results may cause a different MERGE_VALUES to suddenly
1762   // be CSE'd with N, and carry its uses with it. Iterate until no
1763   // uses remain, to ensure that the node can be safely deleted.
1764   // First add the users of this node to the work list so that they
1765   // can be tried again once they have new operands.
1766   AddUsersToWorklist(N);
1767   do {
1768     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1769       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1770   } while (!N->use_empty());
1771   deleteAndRecombine(N);
1772   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1773 }
1774
1775 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1776 /// ConstantSDNode pointer else nullptr.
1777 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1778   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1779   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1780 }
1781
1782 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1783   auto BinOpcode = BO->getOpcode();
1784   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1785           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1786           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1787           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1788           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1789           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1790           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1791           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1792           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1793          "Unexpected binary operator");
1794
1795   // Bail out if any constants are opaque because we can't constant fold those.
1796   SDValue C1 = BO->getOperand(1);
1797   if (!isConstantOrConstantVector(C1, true) &&
1798       !isConstantFPBuildVectorOrConstantFP(C1))
1799     return SDValue();
1800
1801   // Don't do this unless the old select is going away. We want to eliminate the
1802   // binary operator, not replace a binop with a select.
1803   // TODO: Handle ISD::SELECT_CC.
1804   SDValue Sel = BO->getOperand(0);
1805   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1806     return SDValue();
1807
1808   SDValue CT = Sel.getOperand(1);
1809   if (!isConstantOrConstantVector(CT, true) &&
1810       !isConstantFPBuildVectorOrConstantFP(CT))
1811     return SDValue();
1812
1813   SDValue CF = Sel.getOperand(2);
1814   if (!isConstantOrConstantVector(CF, true) &&
1815       !isConstantFPBuildVectorOrConstantFP(CF))
1816     return SDValue();
1817
1818   // We have a select-of-constants followed by a binary operator with a
1819   // constant. Eliminate the binop by pulling the constant math into the select.
1820   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1821   EVT VT = Sel.getValueType();
1822   SDLoc DL(Sel);
1823   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1824   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1825           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1826          "Failed to constant fold a binop with constant operands");
1827
1828   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1829   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1830           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1831          "Failed to constant fold a binop with constant operands");
1832
1833   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1834 }
1835
1836 SDValue DAGCombiner::visitADD(SDNode *N) {
1837   SDValue N0 = N->getOperand(0);
1838   SDValue N1 = N->getOperand(1);
1839   EVT VT = N0.getValueType();
1840   SDLoc DL(N);
1841
1842   // fold vector ops
1843   if (VT.isVector()) {
1844     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1845       return FoldedVOp;
1846
1847     // fold (add x, 0) -> x, vector edition
1848     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1849       return N0;
1850     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1851       return N1;
1852   }
1853
1854   // fold (add x, undef) -> undef
1855   if (N0.isUndef())
1856     return N0;
1857
1858   if (N1.isUndef())
1859     return N1;
1860
1861   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1862     // canonicalize constant to RHS
1863     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1864       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1865     // fold (add c1, c2) -> c1+c2
1866     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1867                                       N1.getNode());
1868   }
1869
1870   // fold (add x, 0) -> x
1871   if (isNullConstant(N1))
1872     return N0;
1873
1874   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1875     // fold ((c1-A)+c2) -> (c1+c2)-A
1876     if (N0.getOpcode() == ISD::SUB &&
1877         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1878       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1879       return DAG.getNode(ISD::SUB, DL, VT,
1880                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1881                          N0.getOperand(1));
1882     }
1883
1884     // add (sext i1 X), 1 -> zext (not i1 X)
1885     // We don't transform this pattern:
1886     //   add (zext i1 X), -1 -> sext (not i1 X)
1887     // because most (?) targets generate better code for the zext form.
1888     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1889         isOneConstantOrOneSplatConstant(N1)) {
1890       SDValue X = N0.getOperand(0);
1891       if ((!LegalOperations ||
1892            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1893             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1894           X.getScalarValueSizeInBits() == 1) {
1895         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1896         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1897       }
1898     }
1899   }
1900
1901   if (SDValue NewSel = foldBinOpIntoSelect(N))
1902     return NewSel;
1903
1904   // reassociate add
1905   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1906     return RADD;
1907
1908   // fold ((0-A) + B) -> B-A
1909   if (N0.getOpcode() == ISD::SUB &&
1910       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1911     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1912
1913   // fold (A + (0-B)) -> A-B
1914   if (N1.getOpcode() == ISD::SUB &&
1915       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1916     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1917
1918   // fold (A+(B-A)) -> B
1919   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1920     return N1.getOperand(0);
1921
1922   // fold ((B-A)+A) -> B
1923   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1924     return N0.getOperand(0);
1925
1926   // fold (A+(B-(A+C))) to (B-C)
1927   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1928       N0 == N1.getOperand(1).getOperand(0))
1929     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1930                        N1.getOperand(1).getOperand(1));
1931
1932   // fold (A+(B-(C+A))) to (B-C)
1933   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1934       N0 == N1.getOperand(1).getOperand(1))
1935     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1936                        N1.getOperand(1).getOperand(0));
1937
1938   // fold (A+((B-A)+or-C)) to (B+or-C)
1939   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1940       N1.getOperand(0).getOpcode() == ISD::SUB &&
1941       N0 == N1.getOperand(0).getOperand(1))
1942     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1943                        N1.getOperand(1));
1944
1945   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1946   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1947     SDValue N00 = N0.getOperand(0);
1948     SDValue N01 = N0.getOperand(1);
1949     SDValue N10 = N1.getOperand(0);
1950     SDValue N11 = N1.getOperand(1);
1951
1952     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1953       return DAG.getNode(ISD::SUB, DL, VT,
1954                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1955                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1956   }
1957
1958   if (SimplifyDemandedBits(SDValue(N, 0)))
1959     return SDValue(N, 0);
1960
1961   // fold (a+b) -> (a|b) iff a and b share no bits.
1962   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1963       DAG.haveNoCommonBitsSet(N0, N1))
1964     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1965
1966   if (SDValue Combined = visitADDLike(N0, N1, N))
1967     return Combined;
1968
1969   if (SDValue Combined = visitADDLike(N1, N0, N))
1970     return Combined;
1971
1972   return SDValue();
1973 }
1974
1975 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
1976   bool Masked = false;
1977
1978   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
1979   while (true) {
1980     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
1981       V = V.getOperand(0);
1982       continue;
1983     }
1984
1985     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
1986       Masked = true;
1987       V = V.getOperand(0);
1988       continue;
1989     }
1990
1991     break;
1992   }
1993
1994   // If this is not a carry, return.
1995   if (V.getResNo() != 1)
1996     return SDValue();
1997
1998   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
1999       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2000     return SDValue();
2001
2002   // If the result is masked, then no matter what kind of bool it is we can
2003   // return. If it isn't, then we need to make sure the bool type is either 0 or
2004   // 1 and not other values.
2005   if (Masked ||
2006       TLI.getBooleanContents(V.getValueType()) ==
2007           TargetLoweringBase::ZeroOrOneBooleanContent)
2008     return V;
2009
2010   return SDValue();
2011 }
2012
2013 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2014   EVT VT = N0.getValueType();
2015   SDLoc DL(LocReference);
2016
2017   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2018   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2019       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2020     return DAG.getNode(ISD::SUB, DL, VT, N0,
2021                        DAG.getNode(ISD::SHL, DL, VT,
2022                                    N1.getOperand(0).getOperand(1),
2023                                    N1.getOperand(1)));
2024
2025   if (N1.getOpcode() == ISD::AND) {
2026     SDValue AndOp0 = N1.getOperand(0);
2027     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2028     unsigned DestBits = VT.getScalarSizeInBits();
2029
2030     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2031     // and similar xforms where the inner op is either ~0 or 0.
2032     if (NumSignBits == DestBits &&
2033         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2034       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2035   }
2036
2037   // add (sext i1), X -> sub X, (zext i1)
2038   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2039       N0.getOperand(0).getValueType() == MVT::i1 &&
2040       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2041     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2042     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2043   }
2044
2045   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2046   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2047     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2048     if (TN->getVT() == MVT::i1) {
2049       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2050                                  DAG.getConstant(1, DL, VT));
2051       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2052     }
2053   }
2054
2055   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2056   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2057     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2058                        N0, N1.getOperand(0), N1.getOperand(2));
2059
2060   // (add X, Carry) -> (addcarry X, 0, Carry)
2061   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2062     if (SDValue Carry = getAsCarry(TLI, N1))
2063       return DAG.getNode(ISD::ADDCARRY, DL,
2064                          DAG.getVTList(VT, Carry.getValueType()), N0,
2065                          DAG.getConstant(0, DL, VT), Carry);
2066
2067   return SDValue();
2068 }
2069
2070 SDValue DAGCombiner::visitADDC(SDNode *N) {
2071   SDValue N0 = N->getOperand(0);
2072   SDValue N1 = N->getOperand(1);
2073   EVT VT = N0.getValueType();
2074   SDLoc DL(N);
2075
2076   // If the flag result is dead, turn this into an ADD.
2077   if (!N->hasAnyUseOfValue(1))
2078     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2079                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2080
2081   // canonicalize constant to RHS.
2082   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2083   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2084   if (N0C && !N1C)
2085     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2086
2087   // fold (addc x, 0) -> x + no carry out
2088   if (isNullConstant(N1))
2089     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2090                                         DL, MVT::Glue));
2091
2092   // If it cannot overflow, transform into an add.
2093   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2094     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2095                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2096
2097   return SDValue();
2098 }
2099
2100 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2101   SDValue N0 = N->getOperand(0);
2102   SDValue N1 = N->getOperand(1);
2103   EVT VT = N0.getValueType();
2104   if (VT.isVector())
2105     return SDValue();
2106
2107   EVT CarryVT = N->getValueType(1);
2108   SDLoc DL(N);
2109
2110   // If the flag result is dead, turn this into an ADD.
2111   if (!N->hasAnyUseOfValue(1))
2112     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2113                      DAG.getUNDEF(CarryVT));
2114
2115   // canonicalize constant to RHS.
2116   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2117   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2118   if (N0C && !N1C)
2119     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2120
2121   // fold (uaddo x, 0) -> x + no carry out
2122   if (isNullConstant(N1))
2123     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2124
2125   // If it cannot overflow, transform into an add.
2126   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2127     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2128                      DAG.getConstant(0, DL, CarryVT));
2129
2130   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2131     return Combined;
2132
2133   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2134     return Combined;
2135
2136   return SDValue();
2137 }
2138
2139 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2140   auto VT = N0.getValueType();
2141
2142   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2143   // If Y + 1 cannot overflow.
2144   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2145     SDValue Y = N1.getOperand(0);
2146     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2147     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2148       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2149                          N1.getOperand(2));
2150   }
2151
2152   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2153   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2154     if (SDValue Carry = getAsCarry(TLI, N1))
2155       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2156                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2157
2158   return SDValue();
2159 }
2160
2161 SDValue DAGCombiner::visitADDE(SDNode *N) {
2162   SDValue N0 = N->getOperand(0);
2163   SDValue N1 = N->getOperand(1);
2164   SDValue CarryIn = N->getOperand(2);
2165
2166   // canonicalize constant to RHS
2167   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2168   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2169   if (N0C && !N1C)
2170     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2171                        N1, N0, CarryIn);
2172
2173   // fold (adde x, y, false) -> (addc x, y)
2174   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2175     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2176
2177   return SDValue();
2178 }
2179
2180 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2181   SDValue N0 = N->getOperand(0);
2182   SDValue N1 = N->getOperand(1);
2183   SDValue CarryIn = N->getOperand(2);
2184   SDLoc DL(N);
2185
2186   // canonicalize constant to RHS
2187   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2188   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2189   if (N0C && !N1C)
2190     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2191
2192   // fold (addcarry x, y, false) -> (uaddo x, y)
2193   if (isNullConstant(CarryIn))
2194     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2195
2196   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2197   if (isNullConstant(N0) && isNullConstant(N1)) {
2198     EVT VT = N0.getValueType();
2199     EVT CarryVT = CarryIn.getValueType();
2200     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2201     AddToWorklist(CarryExt.getNode());
2202     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2203                                     DAG.getConstant(1, DL, VT)),
2204                      DAG.getConstant(0, DL, CarryVT));
2205   }
2206
2207   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2208     return Combined;
2209
2210   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2211     return Combined;
2212
2213   return SDValue();
2214 }
2215
2216 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2217                                        SDNode *N) {
2218   // Iff the flag result is dead:
2219   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2220   if ((N0.getOpcode() == ISD::ADD ||
2221        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2222       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2223     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2224                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2225
2226   /**
2227    * When one of the addcarry argument is itself a carry, we may be facing
2228    * a diamond carry propagation. In which case we try to transform the DAG
2229    * to ensure linear carry propagation if that is possible.
2230    *
2231    * We are trying to get:
2232    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2233    */
2234   if (auto Y = getAsCarry(TLI, N1)) {
2235     /**
2236      *            (uaddo A, B)
2237      *             /       \
2238      *          Carry      Sum
2239      *            |          \
2240      *            | (addcarry *, 0, Z)
2241      *            |       /
2242      *             \   Carry
2243      *              |   /
2244      * (addcarry X, *, *)
2245      */
2246     if (Y.getOpcode() == ISD::UADDO &&
2247         CarryIn.getResNo() == 1 &&
2248         CarryIn.getOpcode() == ISD::ADDCARRY &&
2249         isNullConstant(CarryIn.getOperand(1)) &&
2250         CarryIn.getOperand(0) == Y.getValue(0)) {
2251       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2252                               Y.getOperand(0), Y.getOperand(1),
2253                               CarryIn.getOperand(2));
2254       AddToWorklist(NewY.getNode());
2255       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2256                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2257                          NewY.getValue(1));
2258     }
2259   }
2260
2261   return SDValue();
2262 }
2263
2264 // Since it may not be valid to emit a fold to zero for vector initializers
2265 // check if we can before folding.
2266 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2267                              SelectionDAG &DAG, bool LegalOperations,
2268                              bool LegalTypes) {
2269   if (!VT.isVector())
2270     return DAG.getConstant(0, DL, VT);
2271   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2272     return DAG.getConstant(0, DL, VT);
2273   return SDValue();
2274 }
2275
2276 SDValue DAGCombiner::visitSUB(SDNode *N) {
2277   SDValue N0 = N->getOperand(0);
2278   SDValue N1 = N->getOperand(1);
2279   EVT VT = N0.getValueType();
2280   SDLoc DL(N);
2281
2282   // fold vector ops
2283   if (VT.isVector()) {
2284     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2285       return FoldedVOp;
2286
2287     // fold (sub x, 0) -> x, vector edition
2288     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2289       return N0;
2290   }
2291
2292   // fold (sub x, x) -> 0
2293   // FIXME: Refactor this and xor and other similar operations together.
2294   if (N0 == N1)
2295     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2296   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2297       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2298     // fold (sub c1, c2) -> c1-c2
2299     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2300                                       N1.getNode());
2301   }
2302
2303   if (SDValue NewSel = foldBinOpIntoSelect(N))
2304     return NewSel;
2305
2306   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2307
2308   // fold (sub x, c) -> (add x, -c)
2309   if (N1C) {
2310     return DAG.getNode(ISD::ADD, DL, VT, N0,
2311                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2312   }
2313
2314   if (isNullConstantOrNullSplatConstant(N0)) {
2315     unsigned BitWidth = VT.getScalarSizeInBits();
2316     // Right-shifting everything out but the sign bit followed by negation is
2317     // the same as flipping arithmetic/logical shift type without the negation:
2318     // -(X >>u 31) -> (X >>s 31)
2319     // -(X >>s 31) -> (X >>u 31)
2320     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2321       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2322       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2323         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2324         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2325           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2326       }
2327     }
2328
2329     // 0 - X --> 0 if the sub is NUW.
2330     if (N->getFlags().hasNoUnsignedWrap())
2331       return N0;
2332
2333     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2334       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2335       // N1 must be 0 because negating the minimum signed value is undefined.
2336       if (N->getFlags().hasNoSignedWrap())
2337         return N0;
2338
2339       // 0 - X --> X if X is 0 or the minimum signed value.
2340       return N1;
2341     }
2342   }
2343
2344   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2345   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2346     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2347
2348   // fold A-(A-B) -> B
2349   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2350     return N1.getOperand(1);
2351
2352   // fold (A+B)-A -> B
2353   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2354     return N0.getOperand(1);
2355
2356   // fold (A+B)-B -> A
2357   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2358     return N0.getOperand(0);
2359
2360   // fold C2-(A+C1) -> (C2-C1)-A
2361   if (N1.getOpcode() == ISD::ADD) {
2362     SDValue N11 = N1.getOperand(1);
2363     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2364         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2365       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2366       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2367     }
2368   }
2369
2370   // fold ((A+(B+or-C))-B) -> A+or-C
2371   if (N0.getOpcode() == ISD::ADD &&
2372       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2373        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2374       N0.getOperand(1).getOperand(0) == N1)
2375     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2376                        N0.getOperand(1).getOperand(1));
2377
2378   // fold ((A+(C+B))-B) -> A+C
2379   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2380       N0.getOperand(1).getOperand(1) == N1)
2381     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2382                        N0.getOperand(1).getOperand(0));
2383
2384   // fold ((A-(B-C))-C) -> A-B
2385   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2386       N0.getOperand(1).getOperand(1) == N1)
2387     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2388                        N0.getOperand(1).getOperand(0));
2389
2390   // If either operand of a sub is undef, the result is undef
2391   if (N0.isUndef())
2392     return N0;
2393   if (N1.isUndef())
2394     return N1;
2395
2396   // If the relocation model supports it, consider symbol offsets.
2397   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2398     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2399       // fold (sub Sym, c) -> Sym-c
2400       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2401         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2402                                     GA->getOffset() -
2403                                         (uint64_t)N1C->getSExtValue());
2404       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2405       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2406         if (GA->getGlobal() == GB->getGlobal())
2407           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2408                                  DL, VT);
2409     }
2410
2411   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2412   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2413     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2414     if (TN->getVT() == MVT::i1) {
2415       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2416                                  DAG.getConstant(1, DL, VT));
2417       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2418     }
2419   }
2420
2421   return SDValue();
2422 }
2423
2424 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2425   SDValue N0 = N->getOperand(0);
2426   SDValue N1 = N->getOperand(1);
2427   EVT VT = N0.getValueType();
2428   SDLoc DL(N);
2429
2430   // If the flag result is dead, turn this into an SUB.
2431   if (!N->hasAnyUseOfValue(1))
2432     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2433                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2434
2435   // fold (subc x, x) -> 0 + no borrow
2436   if (N0 == N1)
2437     return CombineTo(N, DAG.getConstant(0, DL, VT),
2438                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2439
2440   // fold (subc x, 0) -> x + no borrow
2441   if (isNullConstant(N1))
2442     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2443
2444   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2445   if (isAllOnesConstant(N0))
2446     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2447                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2448
2449   return SDValue();
2450 }
2451
2452 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2453   SDValue N0 = N->getOperand(0);
2454   SDValue N1 = N->getOperand(1);
2455   EVT VT = N0.getValueType();
2456   if (VT.isVector())
2457     return SDValue();
2458
2459   EVT CarryVT = N->getValueType(1);
2460   SDLoc DL(N);
2461
2462   // If the flag result is dead, turn this into an SUB.
2463   if (!N->hasAnyUseOfValue(1))
2464     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2465                      DAG.getUNDEF(CarryVT));
2466
2467   // fold (usubo x, x) -> 0 + no borrow
2468   if (N0 == N1)
2469     return CombineTo(N, DAG.getConstant(0, DL, VT),
2470                      DAG.getConstant(0, DL, CarryVT));
2471
2472   // fold (usubo x, 0) -> x + no borrow
2473   if (isNullConstant(N1))
2474     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2475
2476   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2477   if (isAllOnesConstant(N0))
2478     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2479                      DAG.getConstant(0, DL, CarryVT));
2480
2481   return SDValue();
2482 }
2483
2484 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2485   SDValue N0 = N->getOperand(0);
2486   SDValue N1 = N->getOperand(1);
2487   SDValue CarryIn = N->getOperand(2);
2488
2489   // fold (sube x, y, false) -> (subc x, y)
2490   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2491     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2492
2493   return SDValue();
2494 }
2495
2496 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2497   SDValue N0 = N->getOperand(0);
2498   SDValue N1 = N->getOperand(1);
2499   SDValue CarryIn = N->getOperand(2);
2500
2501   // fold (subcarry x, y, false) -> (usubo x, y)
2502   if (isNullConstant(CarryIn))
2503     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2504
2505   return SDValue();
2506 }
2507
2508 SDValue DAGCombiner::visitMUL(SDNode *N) {
2509   SDValue N0 = N->getOperand(0);
2510   SDValue N1 = N->getOperand(1);
2511   EVT VT = N0.getValueType();
2512
2513   // fold (mul x, undef) -> 0
2514   if (N0.isUndef() || N1.isUndef())
2515     return DAG.getConstant(0, SDLoc(N), VT);
2516
2517   bool N0IsConst = false;
2518   bool N1IsConst = false;
2519   bool N1IsOpaqueConst = false;
2520   bool N0IsOpaqueConst = false;
2521   APInt ConstValue0, ConstValue1;
2522   // fold vector ops
2523   if (VT.isVector()) {
2524     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2525       return FoldedVOp;
2526
2527     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2528     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2529   } else {
2530     N0IsConst = isa<ConstantSDNode>(N0);
2531     if (N0IsConst) {
2532       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2533       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2534     }
2535     N1IsConst = isa<ConstantSDNode>(N1);
2536     if (N1IsConst) {
2537       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2538       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2539     }
2540   }
2541
2542   // fold (mul c1, c2) -> c1*c2
2543   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2544     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2545                                       N0.getNode(), N1.getNode());
2546
2547   // canonicalize constant to RHS (vector doesn't have to splat)
2548   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2549      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2550     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2551   // fold (mul x, 0) -> 0
2552   if (N1IsConst && ConstValue1 == 0)
2553     return N1;
2554   // We require a splat of the entire scalar bit width for non-contiguous
2555   // bit patterns.
2556   bool IsFullSplat =
2557     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2558   // fold (mul x, 1) -> x
2559   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2560     return N0;
2561
2562   if (SDValue NewSel = foldBinOpIntoSelect(N))
2563     return NewSel;
2564
2565   // fold (mul x, -1) -> 0-x
2566   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2567     SDLoc DL(N);
2568     return DAG.getNode(ISD::SUB, DL, VT,
2569                        DAG.getConstant(0, DL, VT), N0);
2570   }
2571   // fold (mul x, (1 << c)) -> x << c
2572   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2573       IsFullSplat) {
2574     SDLoc DL(N);
2575     return DAG.getNode(ISD::SHL, DL, VT, N0,
2576                        DAG.getConstant(ConstValue1.logBase2(), DL,
2577                                        getShiftAmountTy(N0.getValueType())));
2578   }
2579   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2580   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2581       IsFullSplat) {
2582     unsigned Log2Val = (-ConstValue1).logBase2();
2583     SDLoc DL(N);
2584     // FIXME: If the input is something that is easily negated (e.g. a
2585     // single-use add), we should put the negate there.
2586     return DAG.getNode(ISD::SUB, DL, VT,
2587                        DAG.getConstant(0, DL, VT),
2588                        DAG.getNode(ISD::SHL, DL, VT, N0,
2589                             DAG.getConstant(Log2Val, DL,
2590                                       getShiftAmountTy(N0.getValueType()))));
2591   }
2592
2593   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2594   if (N0.getOpcode() == ISD::SHL &&
2595       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2596       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2597     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2598     if (isConstantOrConstantVector(C3))
2599       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2600   }
2601
2602   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2603   // use.
2604   {
2605     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2606
2607     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2608     if (N0.getOpcode() == ISD::SHL &&
2609         isConstantOrConstantVector(N0.getOperand(1)) &&
2610         N0.getNode()->hasOneUse()) {
2611       Sh = N0; Y = N1;
2612     } else if (N1.getOpcode() == ISD::SHL &&
2613                isConstantOrConstantVector(N1.getOperand(1)) &&
2614                N1.getNode()->hasOneUse()) {
2615       Sh = N1; Y = N0;
2616     }
2617
2618     if (Sh.getNode()) {
2619       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2620       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2621     }
2622   }
2623
2624   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2625   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2626       N0.getOpcode() == ISD::ADD &&
2627       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2628       isMulAddWithConstProfitable(N, N0, N1))
2629       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2630                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2631                                      N0.getOperand(0), N1),
2632                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2633                                      N0.getOperand(1), N1));
2634
2635   // reassociate mul
2636   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2637     return RMUL;
2638
2639   return SDValue();
2640 }
2641
2642 /// Return true if divmod libcall is available.
2643 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2644                                      const TargetLowering &TLI) {
2645   RTLIB::Libcall LC;
2646   EVT NodeType = Node->getValueType(0);
2647   if (!NodeType.isSimple())
2648     return false;
2649   switch (NodeType.getSimpleVT().SimpleTy) {
2650   default: return false; // No libcall for vector types.
2651   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2652   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2653   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2654   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2655   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2656   }
2657
2658   return TLI.getLibcallName(LC) != nullptr;
2659 }
2660
2661 /// Issue divrem if both quotient and remainder are needed.
2662 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2663   if (Node->use_empty())
2664     return SDValue(); // This is a dead node, leave it alone.
2665
2666   unsigned Opcode = Node->getOpcode();
2667   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2668   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2669
2670   // DivMod lib calls can still work on non-legal types if using lib-calls.
2671   EVT VT = Node->getValueType(0);
2672   if (VT.isVector() || !VT.isInteger())
2673     return SDValue();
2674
2675   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2676     return SDValue();
2677
2678   // If DIVREM is going to get expanded into a libcall,
2679   // but there is no libcall available, then don't combine.
2680   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2681       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2682     return SDValue();
2683
2684   // If div is legal, it's better to do the normal expansion
2685   unsigned OtherOpcode = 0;
2686   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2687     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2688     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2689       return SDValue();
2690   } else {
2691     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2692     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2693       return SDValue();
2694   }
2695
2696   SDValue Op0 = Node->getOperand(0);
2697   SDValue Op1 = Node->getOperand(1);
2698   SDValue combined;
2699   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2700          UE = Op0.getNode()->use_end(); UI != UE;) {
2701     SDNode *User = *UI++;
2702     if (User == Node || User->use_empty())
2703       continue;
2704     // Convert the other matching node(s), too;
2705     // otherwise, the DIVREM may get target-legalized into something
2706     // target-specific that we won't be able to recognize.
2707     unsigned UserOpc = User->getOpcode();
2708     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2709         User->getOperand(0) == Op0 &&
2710         User->getOperand(1) == Op1) {
2711       if (!combined) {
2712         if (UserOpc == OtherOpcode) {
2713           SDVTList VTs = DAG.getVTList(VT, VT);
2714           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2715         } else if (UserOpc == DivRemOpc) {
2716           combined = SDValue(User, 0);
2717         } else {
2718           assert(UserOpc == Opcode);
2719           continue;
2720         }
2721       }
2722       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2723         CombineTo(User, combined);
2724       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2725         CombineTo(User, combined.getValue(1));
2726     }
2727   }
2728   return combined;
2729 }
2730
2731 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2732   SDValue N0 = N->getOperand(0);
2733   SDValue N1 = N->getOperand(1);
2734   EVT VT = N->getValueType(0);
2735   SDLoc DL(N);
2736
2737   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2738     return DAG.getUNDEF(VT);
2739
2740   // undef / X -> 0
2741   // undef % X -> 0
2742   if (N0.isUndef())
2743     return DAG.getConstant(0, DL, VT);
2744
2745   return SDValue();
2746 }
2747
2748 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2749   SDValue N0 = N->getOperand(0);
2750   SDValue N1 = N->getOperand(1);
2751   EVT VT = N->getValueType(0);
2752
2753   // fold vector ops
2754   if (VT.isVector())
2755     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2756       return FoldedVOp;
2757
2758   SDLoc DL(N);
2759
2760   // fold (sdiv c1, c2) -> c1/c2
2761   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2762   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2763   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2764     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2765   // fold (sdiv X, 1) -> X
2766   if (N1C && N1C->isOne())
2767     return N0;
2768   // fold (sdiv X, -1) -> 0-X
2769   if (N1C && N1C->isAllOnesValue())
2770     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2771
2772   if (SDValue V = simplifyDivRem(N, DAG))
2773     return V;
2774
2775   if (SDValue NewSel = foldBinOpIntoSelect(N))
2776     return NewSel;
2777
2778   // If we know the sign bits of both operands are zero, strength reduce to a
2779   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2780   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2781     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2782
2783   // fold (sdiv X, pow2) -> simple ops after legalize
2784   // FIXME: We check for the exact bit here because the generic lowering gives
2785   // better results in that case. The target-specific lowering should learn how
2786   // to handle exact sdivs efficiently.
2787   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2788       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2789                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2790     // Target-specific implementation of sdiv x, pow2.
2791     if (SDValue Res = BuildSDIVPow2(N))
2792       return Res;
2793
2794     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2795
2796     // Splat the sign bit into the register
2797     SDValue SGN =
2798         DAG.getNode(ISD::SRA, DL, VT, N0,
2799                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2800                                     getShiftAmountTy(N0.getValueType())));
2801     AddToWorklist(SGN.getNode());
2802
2803     // Add (N0 < 0) ? abs2 - 1 : 0;
2804     SDValue SRL =
2805         DAG.getNode(ISD::SRL, DL, VT, SGN,
2806                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2807                                     getShiftAmountTy(SGN.getValueType())));
2808     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2809     AddToWorklist(SRL.getNode());
2810     AddToWorklist(ADD.getNode());    // Divide by pow2
2811     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2812                   DAG.getConstant(lg2, DL,
2813                                   getShiftAmountTy(ADD.getValueType())));
2814
2815     // If we're dividing by a positive value, we're done.  Otherwise, we must
2816     // negate the result.
2817     if (N1C->getAPIntValue().isNonNegative())
2818       return SRA;
2819
2820     AddToWorklist(SRA.getNode());
2821     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2822   }
2823
2824   // If integer divide is expensive and we satisfy the requirements, emit an
2825   // alternate sequence.  Targets may check function attributes for size/speed
2826   // trade-offs.
2827   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2828   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2829     if (SDValue Op = BuildSDIV(N))
2830       return Op;
2831
2832   // sdiv, srem -> sdivrem
2833   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2834   // true.  Otherwise, we break the simplification logic in visitREM().
2835   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2836     if (SDValue DivRem = useDivRem(N))
2837         return DivRem;
2838
2839   return SDValue();
2840 }
2841
2842 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2843   SDValue N0 = N->getOperand(0);
2844   SDValue N1 = N->getOperand(1);
2845   EVT VT = N->getValueType(0);
2846
2847   // fold vector ops
2848   if (VT.isVector())
2849     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2850       return FoldedVOp;
2851
2852   SDLoc DL(N);
2853
2854   // fold (udiv c1, c2) -> c1/c2
2855   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2856   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2857   if (N0C && N1C)
2858     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2859                                                     N0C, N1C))
2860       return Folded;
2861
2862   if (SDValue V = simplifyDivRem(N, DAG))
2863     return V;
2864
2865   if (SDValue NewSel = foldBinOpIntoSelect(N))
2866     return NewSel;
2867
2868   // fold (udiv x, (1 << c)) -> x >>u c
2869   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2870       DAG.isKnownToBeAPowerOfTwo(N1)) {
2871     SDValue LogBase2 = BuildLogBase2(N1, DL);
2872     AddToWorklist(LogBase2.getNode());
2873
2874     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2875     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2876     AddToWorklist(Trunc.getNode());
2877     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2878   }
2879
2880   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2881   if (N1.getOpcode() == ISD::SHL) {
2882     SDValue N10 = N1.getOperand(0);
2883     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2884         DAG.isKnownToBeAPowerOfTwo(N10)) {
2885       SDValue LogBase2 = BuildLogBase2(N10, DL);
2886       AddToWorklist(LogBase2.getNode());
2887
2888       EVT ADDVT = N1.getOperand(1).getValueType();
2889       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2890       AddToWorklist(Trunc.getNode());
2891       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2892       AddToWorklist(Add.getNode());
2893       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2894     }
2895   }
2896
2897   // fold (udiv x, c) -> alternate
2898   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2899   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2900     if (SDValue Op = BuildUDIV(N))
2901       return Op;
2902
2903   // sdiv, srem -> sdivrem
2904   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2905   // true.  Otherwise, we break the simplification logic in visitREM().
2906   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2907     if (SDValue DivRem = useDivRem(N))
2908         return DivRem;
2909
2910   return SDValue();
2911 }
2912
2913 // handles ISD::SREM and ISD::UREM
2914 SDValue DAGCombiner::visitREM(SDNode *N) {
2915   unsigned Opcode = N->getOpcode();
2916   SDValue N0 = N->getOperand(0);
2917   SDValue N1 = N->getOperand(1);
2918   EVT VT = N->getValueType(0);
2919   bool isSigned = (Opcode == ISD::SREM);
2920   SDLoc DL(N);
2921
2922   // fold (rem c1, c2) -> c1%c2
2923   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2924   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2925   if (N0C && N1C)
2926     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2927       return Folded;
2928
2929   if (SDValue V = simplifyDivRem(N, DAG))
2930     return V;
2931
2932   if (SDValue NewSel = foldBinOpIntoSelect(N))
2933     return NewSel;
2934
2935   if (isSigned) {
2936     // If we know the sign bits of both operands are zero, strength reduce to a
2937     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2938     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2939       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2940   } else {
2941     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
2942     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2943       // fold (urem x, pow2) -> (and x, pow2-1)
2944       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2945       AddToWorklist(Add.getNode());
2946       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2947     }
2948     if (N1.getOpcode() == ISD::SHL &&
2949         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
2950       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2951       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2952       AddToWorklist(Add.getNode());
2953       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2954     }
2955   }
2956
2957   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2958
2959   // If X/C can be simplified by the division-by-constant logic, lower
2960   // X%C to the equivalent of X-X/C*C.
2961   // To avoid mangling nodes, this simplification requires that the combine()
2962   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2963   // against this by skipping the simplification if isIntDivCheap().  When
2964   // div is not cheap, combine will not return a DIVREM.  Regardless,
2965   // checking cheapness here makes sense since the simplification results in
2966   // fatter code.
2967   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2968     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2969     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2970     AddToWorklist(Div.getNode());
2971     SDValue OptimizedDiv = combine(Div.getNode());
2972     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2973       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2974              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2975       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2976       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2977       AddToWorklist(Mul.getNode());
2978       return Sub;
2979     }
2980   }
2981
2982   // sdiv, srem -> sdivrem
2983   if (SDValue DivRem = useDivRem(N))
2984     return DivRem.getValue(1);
2985
2986   return SDValue();
2987 }
2988
2989 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2990   SDValue N0 = N->getOperand(0);
2991   SDValue N1 = N->getOperand(1);
2992   EVT VT = N->getValueType(0);
2993   SDLoc DL(N);
2994
2995   // fold (mulhs x, 0) -> 0
2996   if (isNullConstant(N1))
2997     return N1;
2998   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2999   if (isOneConstant(N1)) {
3000     SDLoc DL(N);
3001     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3002                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3003                                        getShiftAmountTy(N0.getValueType())));
3004   }
3005   // fold (mulhs x, undef) -> 0
3006   if (N0.isUndef() || N1.isUndef())
3007     return DAG.getConstant(0, SDLoc(N), VT);
3008
3009   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3010   // plus a shift.
3011   if (VT.isSimple() && !VT.isVector()) {
3012     MVT Simple = VT.getSimpleVT();
3013     unsigned SimpleSize = Simple.getSizeInBits();
3014     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3015     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3016       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3017       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3018       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3019       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3020             DAG.getConstant(SimpleSize, DL,
3021                             getShiftAmountTy(N1.getValueType())));
3022       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3023     }
3024   }
3025
3026   return SDValue();
3027 }
3028
3029 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3030   SDValue N0 = N->getOperand(0);
3031   SDValue N1 = N->getOperand(1);
3032   EVT VT = N->getValueType(0);
3033   SDLoc DL(N);
3034
3035   // fold (mulhu x, 0) -> 0
3036   if (isNullConstant(N1))
3037     return N1;
3038   // fold (mulhu x, 1) -> 0
3039   if (isOneConstant(N1))
3040     return DAG.getConstant(0, DL, N0.getValueType());
3041   // fold (mulhu x, undef) -> 0
3042   if (N0.isUndef() || N1.isUndef())
3043     return DAG.getConstant(0, DL, VT);
3044
3045   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3046   // plus a shift.
3047   if (VT.isSimple() && !VT.isVector()) {
3048     MVT Simple = VT.getSimpleVT();
3049     unsigned SimpleSize = Simple.getSizeInBits();
3050     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3051     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3052       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3053       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3054       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3055       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3056             DAG.getConstant(SimpleSize, DL,
3057                             getShiftAmountTy(N1.getValueType())));
3058       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3059     }
3060   }
3061
3062   return SDValue();
3063 }
3064
3065 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3066 /// give the opcodes for the two computations that are being performed. Return
3067 /// true if a simplification was made.
3068 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3069                                                 unsigned HiOp) {
3070   // If the high half is not needed, just compute the low half.
3071   bool HiExists = N->hasAnyUseOfValue(1);
3072   if (!HiExists &&
3073       (!LegalOperations ||
3074        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3075     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3076     return CombineTo(N, Res, Res);
3077   }
3078
3079   // If the low half is not needed, just compute the high half.
3080   bool LoExists = N->hasAnyUseOfValue(0);
3081   if (!LoExists &&
3082       (!LegalOperations ||
3083        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3084     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3085     return CombineTo(N, Res, Res);
3086   }
3087
3088   // If both halves are used, return as it is.
3089   if (LoExists && HiExists)
3090     return SDValue();
3091
3092   // If the two computed results can be simplified separately, separate them.
3093   if (LoExists) {
3094     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3095     AddToWorklist(Lo.getNode());
3096     SDValue LoOpt = combine(Lo.getNode());
3097     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3098         (!LegalOperations ||
3099          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3100       return CombineTo(N, LoOpt, LoOpt);
3101   }
3102
3103   if (HiExists) {
3104     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3105     AddToWorklist(Hi.getNode());
3106     SDValue HiOpt = combine(Hi.getNode());
3107     if (HiOpt.getNode() && HiOpt != Hi &&
3108         (!LegalOperations ||
3109          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3110       return CombineTo(N, HiOpt, HiOpt);
3111   }
3112
3113   return SDValue();
3114 }
3115
3116 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3117   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3118     return Res;
3119
3120   EVT VT = N->getValueType(0);
3121   SDLoc DL(N);
3122
3123   // If the type is twice as wide is legal, transform the mulhu to a wider
3124   // multiply plus a shift.
3125   if (VT.isSimple() && !VT.isVector()) {
3126     MVT Simple = VT.getSimpleVT();
3127     unsigned SimpleSize = Simple.getSizeInBits();
3128     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3129     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3130       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3131       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3132       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3133       // Compute the high part as N1.
3134       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3135             DAG.getConstant(SimpleSize, DL,
3136                             getShiftAmountTy(Lo.getValueType())));
3137       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3138       // Compute the low part as N0.
3139       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3140       return CombineTo(N, Lo, Hi);
3141     }
3142   }
3143
3144   return SDValue();
3145 }
3146
3147 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3148   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3149     return Res;
3150
3151   EVT VT = N->getValueType(0);
3152   SDLoc DL(N);
3153
3154   // If the type is twice as wide is legal, transform the mulhu to a wider
3155   // multiply plus a shift.
3156   if (VT.isSimple() && !VT.isVector()) {
3157     MVT Simple = VT.getSimpleVT();
3158     unsigned SimpleSize = Simple.getSizeInBits();
3159     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3160     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3161       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3162       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3163       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3164       // Compute the high part as N1.
3165       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3166             DAG.getConstant(SimpleSize, DL,
3167                             getShiftAmountTy(Lo.getValueType())));
3168       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3169       // Compute the low part as N0.
3170       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3171       return CombineTo(N, Lo, Hi);
3172     }
3173   }
3174
3175   return SDValue();
3176 }
3177
3178 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3179   // (smulo x, 2) -> (saddo x, x)
3180   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3181     if (C2->getAPIntValue() == 2)
3182       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3183                          N->getOperand(0), N->getOperand(0));
3184
3185   return SDValue();
3186 }
3187
3188 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3189   // (umulo x, 2) -> (uaddo x, x)
3190   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3191     if (C2->getAPIntValue() == 2)
3192       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3193                          N->getOperand(0), N->getOperand(0));
3194
3195   return SDValue();
3196 }
3197
3198 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3199   SDValue N0 = N->getOperand(0);
3200   SDValue N1 = N->getOperand(1);
3201   EVT VT = N0.getValueType();
3202
3203   // fold vector ops
3204   if (VT.isVector())
3205     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3206       return FoldedVOp;
3207
3208   // fold (add c1, c2) -> c1+c2
3209   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3210   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3211   if (N0C && N1C)
3212     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3213
3214   // canonicalize constant to RHS
3215   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3216      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3217     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3218
3219   return SDValue();
3220 }
3221
3222 /// If this is a binary operator with two operands of the same opcode, try to
3223 /// simplify it.
3224 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3225   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3226   EVT VT = N0.getValueType();
3227   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3228
3229   // Bail early if none of these transforms apply.
3230   if (N0.getNumOperands() == 0) return SDValue();
3231
3232   // For each of OP in AND/OR/XOR:
3233   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3234   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3235   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3236   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3237   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3238   //
3239   // do not sink logical op inside of a vector extend, since it may combine
3240   // into a vsetcc.
3241   EVT Op0VT = N0.getOperand(0).getValueType();
3242   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3243        N0.getOpcode() == ISD::SIGN_EXTEND ||
3244        N0.getOpcode() == ISD::BSWAP ||
3245        // Avoid infinite looping with PromoteIntBinOp.
3246        (N0.getOpcode() == ISD::ANY_EXTEND &&
3247         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3248        (N0.getOpcode() == ISD::TRUNCATE &&
3249         (!TLI.isZExtFree(VT, Op0VT) ||
3250          !TLI.isTruncateFree(Op0VT, VT)) &&
3251         TLI.isTypeLegal(Op0VT))) &&
3252       !VT.isVector() &&
3253       Op0VT == N1.getOperand(0).getValueType() &&
3254       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3255     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3256                                  N0.getOperand(0).getValueType(),
3257                                  N0.getOperand(0), N1.getOperand(0));
3258     AddToWorklist(ORNode.getNode());
3259     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3260   }
3261
3262   // For each of OP in SHL/SRL/SRA/AND...
3263   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3264   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3265   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3266   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3267        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3268       N0.getOperand(1) == N1.getOperand(1)) {
3269     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3270                                  N0.getOperand(0).getValueType(),
3271                                  N0.getOperand(0), N1.getOperand(0));
3272     AddToWorklist(ORNode.getNode());
3273     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3274                        ORNode, N0.getOperand(1));
3275   }
3276
3277   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3278   // Only perform this optimization up until type legalization, before
3279   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3280   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3281   // we don't want to undo this promotion.
3282   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3283   // on scalars.
3284   if ((N0.getOpcode() == ISD::BITCAST ||
3285        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3286        Level <= AfterLegalizeTypes) {
3287     SDValue In0 = N0.getOperand(0);
3288     SDValue In1 = N1.getOperand(0);
3289     EVT In0Ty = In0.getValueType();
3290     EVT In1Ty = In1.getValueType();
3291     SDLoc DL(N);
3292     // If both incoming values are integers, and the original types are the
3293     // same.
3294     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3295       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3296       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3297       AddToWorklist(Op.getNode());
3298       return BC;
3299     }
3300   }
3301
3302   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3303   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3304   // If both shuffles use the same mask, and both shuffle within a single
3305   // vector, then it is worthwhile to move the swizzle after the operation.
3306   // The type-legalizer generates this pattern when loading illegal
3307   // vector types from memory. In many cases this allows additional shuffle
3308   // optimizations.
3309   // There are other cases where moving the shuffle after the xor/and/or
3310   // is profitable even if shuffles don't perform a swizzle.
3311   // If both shuffles use the same mask, and both shuffles have the same first
3312   // or second operand, then it might still be profitable to move the shuffle
3313   // after the xor/and/or operation.
3314   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3315     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3316     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3317
3318     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3319            "Inputs to shuffles are not the same type");
3320
3321     // Check that both shuffles use the same mask. The masks are known to be of
3322     // the same length because the result vector type is the same.
3323     // Check also that shuffles have only one use to avoid introducing extra
3324     // instructions.
3325     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3326         SVN0->getMask().equals(SVN1->getMask())) {
3327       SDValue ShOp = N0->getOperand(1);
3328
3329       // Don't try to fold this node if it requires introducing a
3330       // build vector of all zeros that might be illegal at this stage.
3331       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3332         if (!LegalTypes)
3333           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3334         else
3335           ShOp = SDValue();
3336       }
3337
3338       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3339       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3340       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3341       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3342         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3343                                       N0->getOperand(0), N1->getOperand(0));
3344         AddToWorklist(NewNode.getNode());
3345         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3346                                     SVN0->getMask());
3347       }
3348
3349       // Don't try to fold this node if it requires introducing a
3350       // build vector of all zeros that might be illegal at this stage.
3351       ShOp = N0->getOperand(0);
3352       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3353         if (!LegalTypes)
3354           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3355         else
3356           ShOp = SDValue();
3357       }
3358
3359       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3360       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3361       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3362       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3363         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3364                                       N0->getOperand(1), N1->getOperand(1));
3365         AddToWorklist(NewNode.getNode());
3366         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3367                                     SVN0->getMask());
3368       }
3369     }
3370   }
3371
3372   return SDValue();
3373 }
3374
3375 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3376 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3377                                        const SDLoc &DL) {
3378   SDValue LL, LR, RL, RR, N0CC, N1CC;
3379   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3380       !isSetCCEquivalent(N1, RL, RR, N1CC))
3381     return SDValue();
3382
3383   assert(N0.getValueType() == N1.getValueType() &&
3384          "Unexpected operand types for bitwise logic op");
3385   assert(LL.getValueType() == LR.getValueType() &&
3386          RL.getValueType() == RR.getValueType() &&
3387          "Unexpected operand types for setcc");
3388
3389   // If we're here post-legalization or the logic op type is not i1, the logic
3390   // op type must match a setcc result type. Also, all folds require new
3391   // operations on the left and right operands, so those types must match.
3392   EVT VT = N0.getValueType();
3393   EVT OpVT = LL.getValueType();
3394   if (LegalOperations || VT != MVT::i1)
3395     if (VT != getSetCCResultType(OpVT))
3396       return SDValue();
3397   if (OpVT != RL.getValueType())
3398     return SDValue();
3399
3400   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3401   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3402   bool IsInteger = OpVT.isInteger();
3403   if (LR == RR && CC0 == CC1 && IsInteger) {
3404     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3405     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3406
3407     // All bits clear?
3408     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3409     // All sign bits clear?
3410     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3411     // Any bits set?
3412     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3413     // Any sign bits set?
3414     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3415
3416     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3417     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3418     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3419     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3420     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3421       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3422       AddToWorklist(Or.getNode());
3423       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3424     }
3425
3426     // All bits set?
3427     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3428     // All sign bits set?
3429     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3430     // Any bits clear?
3431     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3432     // Any sign bits clear?
3433     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3434
3435     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3436     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3437     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3438     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3439     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3440       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3441       AddToWorklist(And.getNode());
3442       return DAG.getSetCC(DL, VT, And, LR, CC1);
3443     }
3444   }
3445
3446   // TODO: What is the 'or' equivalent of this fold?
3447   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3448   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3449       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3450        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3451     SDValue One = DAG.getConstant(1, DL, OpVT);
3452     SDValue Two = DAG.getConstant(2, DL, OpVT);
3453     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3454     AddToWorklist(Add.getNode());
3455     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3456   }
3457
3458   // Try more general transforms if the predicates match and the only user of
3459   // the compares is the 'and' or 'or'.
3460   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3461       N0.hasOneUse() && N1.hasOneUse()) {
3462     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3463     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3464     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3465       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3466       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3467       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3468       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3469       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3470     }
3471   }
3472
3473   // Canonicalize equivalent operands to LL == RL.
3474   if (LL == RR && LR == RL) {
3475     CC1 = ISD::getSetCCSwappedOperands(CC1);
3476     std::swap(RL, RR);
3477   }
3478
3479   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3480   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3481   if (LL == RL && LR == RR) {
3482     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3483                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3484     if (NewCC != ISD::SETCC_INVALID &&
3485         (!LegalOperations ||
3486          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3487           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3488       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3489   }
3490
3491   return SDValue();
3492 }
3493
3494 /// This contains all DAGCombine rules which reduce two values combined by
3495 /// an And operation to a single value. This makes them reusable in the context
3496 /// of visitSELECT(). Rules involving constants are not included as
3497 /// visitSELECT() already handles those cases.
3498 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3499   EVT VT = N1.getValueType();
3500   SDLoc DL(N);
3501
3502   // fold (and x, undef) -> 0
3503   if (N0.isUndef() || N1.isUndef())
3504     return DAG.getConstant(0, DL, VT);
3505
3506   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3507     return V;
3508
3509   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3510       VT.getSizeInBits() <= 64) {
3511     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3512       APInt ADDC = ADDI->getAPIntValue();
3513       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3514         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3515         // immediate for an add, but it is legal if its top c2 bits are set,
3516         // transform the ADD so the immediate doesn't need to be materialized
3517         // in a register.
3518         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3519           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3520                                              SRLI->getZExtValue());
3521           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3522             ADDC |= Mask;
3523             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3524               SDLoc DL0(N0);
3525               SDValue NewAdd =
3526                 DAG.getNode(ISD::ADD, DL0, VT,
3527                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3528               CombineTo(N0.getNode(), NewAdd);
3529               // Return N so it doesn't get rechecked!
3530               return SDValue(N, 0);
3531             }
3532           }
3533         }
3534       }
3535     }
3536   }
3537
3538   // Reduce bit extract of low half of an integer to the narrower type.
3539   // (and (srl i64:x, K), KMask) ->
3540   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3541   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3542     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3543       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3544         unsigned Size = VT.getSizeInBits();
3545         const APInt &AndMask = CAnd->getAPIntValue();
3546         unsigned ShiftBits = CShift->getZExtValue();
3547
3548         // Bail out, this node will probably disappear anyway.
3549         if (ShiftBits == 0)
3550           return SDValue();
3551
3552         unsigned MaskBits = AndMask.countTrailingOnes();
3553         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3554
3555         if (AndMask.isMask() &&
3556             // Required bits must not span the two halves of the integer and
3557             // must fit in the half size type.
3558             (ShiftBits + MaskBits <= Size / 2) &&
3559             TLI.isNarrowingProfitable(VT, HalfVT) &&
3560             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3561             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3562             TLI.isTruncateFree(VT, HalfVT) &&
3563             TLI.isZExtFree(HalfVT, VT)) {
3564           // The isNarrowingProfitable is to avoid regressions on PPC and
3565           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3566           // on downstream users of this. Those patterns could probably be
3567           // extended to handle extensions mixed in.
3568
3569           SDValue SL(N0);
3570           assert(MaskBits <= Size);
3571
3572           // Extracting the highest bit of the low half.
3573           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3574           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3575                                       N0.getOperand(0));
3576
3577           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3578           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3579           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3580           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3581           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3582         }
3583       }
3584     }
3585   }
3586
3587   return SDValue();
3588 }
3589
3590 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3591                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3592                                    bool &NarrowLoad) {
3593   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3594
3595   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3596     return false;
3597
3598   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3599   LoadedVT = LoadN->getMemoryVT();
3600
3601   if (ExtVT == LoadedVT &&
3602       (!LegalOperations ||
3603        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3604     // ZEXTLOAD will match without needing to change the size of the value being
3605     // loaded.
3606     NarrowLoad = false;
3607     return true;
3608   }
3609
3610   // Do not change the width of a volatile load.
3611   if (LoadN->isVolatile())
3612     return false;
3613
3614   // Do not generate loads of non-round integer types since these can
3615   // be expensive (and would be wrong if the type is not byte sized).
3616   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3617     return false;
3618
3619   if (LegalOperations &&
3620       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3621     return false;
3622
3623   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3624     return false;
3625
3626   NarrowLoad = true;
3627   return true;
3628 }
3629
3630 SDValue DAGCombiner::visitAND(SDNode *N) {
3631   SDValue N0 = N->getOperand(0);
3632   SDValue N1 = N->getOperand(1);
3633   EVT VT = N1.getValueType();
3634
3635   // x & x --> x
3636   if (N0 == N1)
3637     return N0;
3638
3639   // fold vector ops
3640   if (VT.isVector()) {
3641     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3642       return FoldedVOp;
3643
3644     // fold (and x, 0) -> 0, vector edition
3645     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3646       // do not return N0, because undef node may exist in N0
3647       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3648                              SDLoc(N), N0.getValueType());
3649     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3650       // do not return N1, because undef node may exist in N1
3651       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3652                              SDLoc(N), N1.getValueType());
3653
3654     // fold (and x, -1) -> x, vector edition
3655     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3656       return N1;
3657     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3658       return N0;
3659   }
3660
3661   // fold (and c1, c2) -> c1&c2
3662   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3663   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3664   if (N0C && N1C && !N1C->isOpaque())
3665     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3666   // canonicalize constant to RHS
3667   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3668      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3669     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3670   // fold (and x, -1) -> x
3671   if (isAllOnesConstant(N1))
3672     return N0;
3673   // if (and x, c) is known to be zero, return 0
3674   unsigned BitWidth = VT.getScalarSizeInBits();
3675   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3676                                    APInt::getAllOnesValue(BitWidth)))
3677     return DAG.getConstant(0, SDLoc(N), VT);
3678
3679   if (SDValue NewSel = foldBinOpIntoSelect(N))
3680     return NewSel;
3681
3682   // reassociate and
3683   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3684     return RAND;
3685   // fold (and (or x, C), D) -> D if (C & D) == D
3686   if (N1C && N0.getOpcode() == ISD::OR)
3687     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3688       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3689         return N1;
3690   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3691   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3692     SDValue N0Op0 = N0.getOperand(0);
3693     APInt Mask = ~N1C->getAPIntValue();
3694     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3695     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3696       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3697                                  N0.getValueType(), N0Op0);
3698
3699       // Replace uses of the AND with uses of the Zero extend node.
3700       CombineTo(N, Zext);
3701
3702       // We actually want to replace all uses of the any_extend with the
3703       // zero_extend, to avoid duplicating things.  This will later cause this
3704       // AND to be folded.
3705       CombineTo(N0.getNode(), Zext);
3706       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3707     }
3708   }
3709   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3710   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3711   // already be zero by virtue of the width of the base type of the load.
3712   //
3713   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3714   // more cases.
3715   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3716        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3717        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3718        N0.getOperand(0).getResNo() == 0) ||
3719       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3720     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3721                                          N0 : N0.getOperand(0) );
3722
3723     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3724     // This can be a pure constant or a vector splat, in which case we treat the
3725     // vector as a scalar and use the splat value.
3726     APInt Constant = APInt::getNullValue(1);
3727     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3728       Constant = C->getAPIntValue();
3729     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3730       APInt SplatValue, SplatUndef;
3731       unsigned SplatBitSize;
3732       bool HasAnyUndefs;
3733       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3734                                              SplatBitSize, HasAnyUndefs);
3735       if (IsSplat) {
3736         // Undef bits can contribute to a possible optimisation if set, so
3737         // set them.
3738         SplatValue |= SplatUndef;
3739
3740         // The splat value may be something like "0x00FFFFFF", which means 0 for
3741         // the first vector value and FF for the rest, repeating. We need a mask
3742         // that will apply equally to all members of the vector, so AND all the
3743         // lanes of the constant together.
3744         EVT VT = Vector->getValueType(0);
3745         unsigned BitWidth = VT.getScalarSizeInBits();
3746
3747         // If the splat value has been compressed to a bitlength lower
3748         // than the size of the vector lane, we need to re-expand it to
3749         // the lane size.
3750         if (BitWidth > SplatBitSize)
3751           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3752                SplatBitSize < BitWidth;
3753                SplatBitSize = SplatBitSize * 2)
3754             SplatValue |= SplatValue.shl(SplatBitSize);
3755
3756         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3757         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3758         if (SplatBitSize % BitWidth == 0) {
3759           Constant = APInt::getAllOnesValue(BitWidth);
3760           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3761             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3762         }
3763       }
3764     }
3765
3766     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3767     // actually legal and isn't going to get expanded, else this is a false
3768     // optimisation.
3769     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3770                                                     Load->getValueType(0),
3771                                                     Load->getMemoryVT());
3772
3773     // Resize the constant to the same size as the original memory access before
3774     // extension. If it is still the AllOnesValue then this AND is completely
3775     // unneeded.
3776     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3777
3778     bool B;
3779     switch (Load->getExtensionType()) {
3780     default: B = false; break;
3781     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3782     case ISD::ZEXTLOAD:
3783     case ISD::NON_EXTLOAD: B = true; break;
3784     }
3785
3786     if (B && Constant.isAllOnesValue()) {
3787       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3788       // preserve semantics once we get rid of the AND.
3789       SDValue NewLoad(Load, 0);
3790
3791       // Fold the AND away. NewLoad may get replaced immediately.
3792       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3793
3794       if (Load->getExtensionType() == ISD::EXTLOAD) {
3795         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3796                               Load->getValueType(0), SDLoc(Load),
3797                               Load->getChain(), Load->getBasePtr(),
3798                               Load->getOffset(), Load->getMemoryVT(),
3799                               Load->getMemOperand());
3800         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3801         if (Load->getNumValues() == 3) {
3802           // PRE/POST_INC loads have 3 values.
3803           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3804                            NewLoad.getValue(2) };
3805           CombineTo(Load, To, 3, true);
3806         } else {
3807           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3808         }
3809       }
3810
3811       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3812     }
3813   }
3814
3815   // fold (and (load x), 255) -> (zextload x, i8)
3816   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3817   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3818   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3819                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3820                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3821     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3822     LoadSDNode *LN0 = HasAnyExt
3823       ? cast<LoadSDNode>(N0.getOperand(0))
3824       : cast<LoadSDNode>(N0);
3825     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3826         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3827       auto NarrowLoad = false;
3828       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3829       EVT ExtVT, LoadedVT;
3830       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3831                            NarrowLoad)) {
3832         if (!NarrowLoad) {
3833           SDValue NewLoad =
3834             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3835                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3836                            LN0->getMemOperand());
3837           AddToWorklist(N);
3838           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3839           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3840         } else {
3841           EVT PtrType = LN0->getOperand(1).getValueType();
3842
3843           unsigned Alignment = LN0->getAlignment();
3844           SDValue NewPtr = LN0->getBasePtr();
3845
3846           // For big endian targets, we need to add an offset to the pointer
3847           // to load the correct bytes.  For little endian systems, we merely
3848           // need to read fewer bytes from the same pointer.
3849           if (DAG.getDataLayout().isBigEndian()) {
3850             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3851             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3852             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3853             SDLoc DL(LN0);
3854             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3855                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3856             Alignment = MinAlign(Alignment, PtrOff);
3857           }
3858
3859           AddToWorklist(NewPtr.getNode());
3860
3861           SDValue Load = DAG.getExtLoad(
3862               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3863               LN0->getPointerInfo(), ExtVT, Alignment,
3864               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3865           AddToWorklist(N);
3866           CombineTo(LN0, Load, Load.getValue(1));
3867           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3868         }
3869       }
3870     }
3871   }
3872
3873   if (SDValue Combined = visitANDLike(N0, N1, N))
3874     return Combined;
3875
3876   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3877   if (N0.getOpcode() == N1.getOpcode())
3878     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3879       return Tmp;
3880
3881   // Masking the negated extension of a boolean is just the zero-extended
3882   // boolean:
3883   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3884   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3885   //
3886   // Note: the SimplifyDemandedBits fold below can make an information-losing
3887   // transform, and then we have no way to find this better fold.
3888   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3889     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3890     SDValue SubRHS = N0.getOperand(1);
3891     if (SubLHS && SubLHS->isNullValue()) {
3892       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3893           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3894         return SubRHS;
3895       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3896           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3897         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3898     }
3899   }
3900
3901   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3902   // fold (and (sra)) -> (and (srl)) when possible.
3903   if (SimplifyDemandedBits(SDValue(N, 0)))
3904     return SDValue(N, 0);
3905
3906   // fold (zext_inreg (extload x)) -> (zextload x)
3907   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3908     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3909     EVT MemVT = LN0->getMemoryVT();
3910     // If we zero all the possible extended bits, then we can turn this into
3911     // a zextload if we are running before legalize or the operation is legal.
3912     unsigned BitWidth = N1.getScalarValueSizeInBits();
3913     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3914                            BitWidth - MemVT.getScalarSizeInBits())) &&
3915         ((!LegalOperations && !LN0->isVolatile()) ||
3916          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3917       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3918                                        LN0->getChain(), LN0->getBasePtr(),
3919                                        MemVT, LN0->getMemOperand());
3920       AddToWorklist(N);
3921       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3922       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3923     }
3924   }
3925   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3926   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3927       N0.hasOneUse()) {
3928     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3929     EVT MemVT = LN0->getMemoryVT();
3930     // If we zero all the possible extended bits, then we can turn this into
3931     // a zextload if we are running before legalize or the operation is legal.
3932     unsigned BitWidth = N1.getScalarValueSizeInBits();
3933     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3934                            BitWidth - MemVT.getScalarSizeInBits())) &&
3935         ((!LegalOperations && !LN0->isVolatile()) ||
3936          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3937       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3938                                        LN0->getChain(), LN0->getBasePtr(),
3939                                        MemVT, LN0->getMemOperand());
3940       AddToWorklist(N);
3941       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3942       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3943     }
3944   }
3945   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3946   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3947     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3948                                            N0.getOperand(1), false))
3949       return BSwap;
3950   }
3951
3952   return SDValue();
3953 }
3954
3955 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3956 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3957                                         bool DemandHighBits) {
3958   if (!LegalOperations)
3959     return SDValue();
3960
3961   EVT VT = N->getValueType(0);
3962   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3963     return SDValue();
3964   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
3965     return SDValue();
3966
3967   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3968   bool LookPassAnd0 = false;
3969   bool LookPassAnd1 = false;
3970   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3971       std::swap(N0, N1);
3972   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3973       std::swap(N0, N1);
3974   if (N0.getOpcode() == ISD::AND) {
3975     if (!N0.getNode()->hasOneUse())
3976       return SDValue();
3977     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3978     if (!N01C || N01C->getZExtValue() != 0xFF00)
3979       return SDValue();
3980     N0 = N0.getOperand(0);
3981     LookPassAnd0 = true;
3982   }
3983
3984   if (N1.getOpcode() == ISD::AND) {
3985     if (!N1.getNode()->hasOneUse())
3986       return SDValue();
3987     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3988     if (!N11C || N11C->getZExtValue() != 0xFF)
3989       return SDValue();
3990     N1 = N1.getOperand(0);
3991     LookPassAnd1 = true;
3992   }
3993
3994   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3995     std::swap(N0, N1);
3996   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3997     return SDValue();
3998   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
3999     return SDValue();
4000
4001   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4002   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4003   if (!N01C || !N11C)
4004     return SDValue();
4005   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4006     return SDValue();
4007
4008   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4009   SDValue N00 = N0->getOperand(0);
4010   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4011     if (!N00.getNode()->hasOneUse())
4012       return SDValue();
4013     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4014     if (!N001C || N001C->getZExtValue() != 0xFF)
4015       return SDValue();
4016     N00 = N00.getOperand(0);
4017     LookPassAnd0 = true;
4018   }
4019
4020   SDValue N10 = N1->getOperand(0);
4021   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4022     if (!N10.getNode()->hasOneUse())
4023       return SDValue();
4024     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4025     if (!N101C || N101C->getZExtValue() != 0xFF00)
4026       return SDValue();
4027     N10 = N10.getOperand(0);
4028     LookPassAnd1 = true;
4029   }
4030
4031   if (N00 != N10)
4032     return SDValue();
4033
4034   // Make sure everything beyond the low halfword gets set to zero since the SRL
4035   // 16 will clear the top bits.
4036   unsigned OpSizeInBits = VT.getSizeInBits();
4037   if (DemandHighBits && OpSizeInBits > 16) {
4038     // If the left-shift isn't masked out then the only way this is a bswap is
4039     // if all bits beyond the low 8 are 0. In that case the entire pattern
4040     // reduces to a left shift anyway: leave it for other parts of the combiner.
4041     if (!LookPassAnd0)
4042       return SDValue();
4043
4044     // However, if the right shift isn't masked out then it might be because
4045     // it's not needed. See if we can spot that too.
4046     if (!LookPassAnd1 &&
4047         !DAG.MaskedValueIsZero(
4048             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4049       return SDValue();
4050   }
4051
4052   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4053   if (OpSizeInBits > 16) {
4054     SDLoc DL(N);
4055     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4056                       DAG.getConstant(OpSizeInBits - 16, DL,
4057                                       getShiftAmountTy(VT)));
4058   }
4059   return Res;
4060 }
4061
4062 /// Return true if the specified node is an element that makes up a 32-bit
4063 /// packed halfword byteswap.
4064 /// ((x & 0x000000ff) << 8) |
4065 /// ((x & 0x0000ff00) >> 8) |
4066 /// ((x & 0x00ff0000) << 8) |
4067 /// ((x & 0xff000000) >> 8)
4068 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4069   if (!N.getNode()->hasOneUse())
4070     return false;
4071
4072   unsigned Opc = N.getOpcode();
4073   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4074     return false;
4075
4076   SDValue N0 = N.getOperand(0);
4077   unsigned Opc0 = N0.getOpcode();
4078   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4079     return false;
4080
4081   ConstantSDNode *N1C = nullptr;
4082   // SHL or SRL: look upstream for AND mask operand
4083   if (Opc == ISD::AND)
4084     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4085   else if (Opc0 == ISD::AND)
4086     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4087   if (!N1C)
4088     return false;
4089
4090   unsigned MaskByteOffset;
4091   switch (N1C->getZExtValue()) {
4092   default:
4093     return false;
4094   case 0xFF:       MaskByteOffset = 0; break;
4095   case 0xFF00:     MaskByteOffset = 1; break;
4096   case 0xFF0000:   MaskByteOffset = 2; break;
4097   case 0xFF000000: MaskByteOffset = 3; break;
4098   }
4099
4100   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4101   if (Opc == ISD::AND) {
4102     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4103       // (x >> 8) & 0xff
4104       // (x >> 8) & 0xff0000
4105       if (Opc0 != ISD::SRL)
4106         return false;
4107       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4108       if (!C || C->getZExtValue() != 8)
4109         return false;
4110     } else {
4111       // (x << 8) & 0xff00
4112       // (x << 8) & 0xff000000
4113       if (Opc0 != ISD::SHL)
4114         return false;
4115       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4116       if (!C || C->getZExtValue() != 8)
4117         return false;
4118     }
4119   } else if (Opc == ISD::SHL) {
4120     // (x & 0xff) << 8
4121     // (x & 0xff0000) << 8
4122     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4123       return false;
4124     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4125     if (!C || C->getZExtValue() != 8)
4126       return false;
4127   } else { // Opc == ISD::SRL
4128     // (x & 0xff00) >> 8
4129     // (x & 0xff000000) >> 8
4130     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4131       return false;
4132     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4133     if (!C || C->getZExtValue() != 8)
4134       return false;
4135   }
4136
4137   if (Parts[MaskByteOffset])
4138     return false;
4139
4140   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4141   return true;
4142 }
4143
4144 /// Match a 32-bit packed halfword bswap. That is
4145 /// ((x & 0x000000ff) << 8) |
4146 /// ((x & 0x0000ff00) >> 8) |
4147 /// ((x & 0x00ff0000) << 8) |
4148 /// ((x & 0xff000000) >> 8)
4149 /// => (rotl (bswap x), 16)
4150 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4151   if (!LegalOperations)
4152     return SDValue();
4153
4154   EVT VT = N->getValueType(0);
4155   if (VT != MVT::i32)
4156     return SDValue();
4157   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4158     return SDValue();
4159
4160   // Look for either
4161   // (or (or (and), (and)), (or (and), (and)))
4162   // (or (or (or (and), (and)), (and)), (and))
4163   if (N0.getOpcode() != ISD::OR)
4164     return SDValue();
4165   SDValue N00 = N0.getOperand(0);
4166   SDValue N01 = N0.getOperand(1);
4167   SDNode *Parts[4] = {};
4168
4169   if (N1.getOpcode() == ISD::OR &&
4170       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4171     // (or (or (and), (and)), (or (and), (and)))
4172     if (!isBSwapHWordElement(N00, Parts))
4173       return SDValue();
4174
4175     if (!isBSwapHWordElement(N01, Parts))
4176       return SDValue();
4177     SDValue N10 = N1.getOperand(0);
4178     if (!isBSwapHWordElement(N10, Parts))
4179       return SDValue();
4180     SDValue N11 = N1.getOperand(1);
4181     if (!isBSwapHWordElement(N11, Parts))
4182       return SDValue();
4183   } else {
4184     // (or (or (or (and), (and)), (and)), (and))
4185     if (!isBSwapHWordElement(N1, Parts))
4186       return SDValue();
4187     if (!isBSwapHWordElement(N01, Parts))
4188       return SDValue();
4189     if (N00.getOpcode() != ISD::OR)
4190       return SDValue();
4191     SDValue N000 = N00.getOperand(0);
4192     if (!isBSwapHWordElement(N000, Parts))
4193       return SDValue();
4194     SDValue N001 = N00.getOperand(1);
4195     if (!isBSwapHWordElement(N001, Parts))
4196       return SDValue();
4197   }
4198
4199   // Make sure the parts are all coming from the same node.
4200   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4201     return SDValue();
4202
4203   SDLoc DL(N);
4204   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4205                               SDValue(Parts[0], 0));
4206
4207   // Result of the bswap should be rotated by 16. If it's not legal, then
4208   // do  (x << 16) | (x >> 16).
4209   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4210   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4211     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4212   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4213     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4214   return DAG.getNode(ISD::OR, DL, VT,
4215                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4216                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4217 }
4218
4219 /// This contains all DAGCombine rules which reduce two values combined by
4220 /// an Or operation to a single value \see visitANDLike().
4221 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4222   EVT VT = N1.getValueType();
4223   SDLoc DL(N);
4224
4225   // fold (or x, undef) -> -1
4226   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4227     return DAG.getAllOnesConstant(DL, VT);
4228
4229   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4230     return V;
4231
4232   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4233   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4234       // Don't increase # computations.
4235       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4236     // We can only do this xform if we know that bits from X that are set in C2
4237     // but not in C1 are already zero.  Likewise for Y.
4238     if (const ConstantSDNode *N0O1C =
4239         getAsNonOpaqueConstant(N0.getOperand(1))) {
4240       if (const ConstantSDNode *N1O1C =
4241           getAsNonOpaqueConstant(N1.getOperand(1))) {
4242         // We can only do this xform if we know that bits from X that are set in
4243         // C2 but not in C1 are already zero.  Likewise for Y.
4244         const APInt &LHSMask = N0O1C->getAPIntValue();
4245         const APInt &RHSMask = N1O1C->getAPIntValue();
4246
4247         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4248             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4249           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4250                                   N0.getOperand(0), N1.getOperand(0));
4251           return DAG.getNode(ISD::AND, DL, VT, X,
4252                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4253         }
4254       }
4255     }
4256   }
4257
4258   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4259   if (N0.getOpcode() == ISD::AND &&
4260       N1.getOpcode() == ISD::AND &&
4261       N0.getOperand(0) == N1.getOperand(0) &&
4262       // Don't increase # computations.
4263       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4264     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4265                             N0.getOperand(1), N1.getOperand(1));
4266     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4267   }
4268
4269   return SDValue();
4270 }
4271
4272 SDValue DAGCombiner::visitOR(SDNode *N) {
4273   SDValue N0 = N->getOperand(0);
4274   SDValue N1 = N->getOperand(1);
4275   EVT VT = N1.getValueType();
4276
4277   // x | x --> x
4278   if (N0 == N1)
4279     return N0;
4280
4281   // fold vector ops
4282   if (VT.isVector()) {
4283     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4284       return FoldedVOp;
4285
4286     // fold (or x, 0) -> x, vector edition
4287     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4288       return N1;
4289     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4290       return N0;
4291
4292     // fold (or x, -1) -> -1, vector edition
4293     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4294       // do not return N0, because undef node may exist in N0
4295       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4296     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4297       // do not return N1, because undef node may exist in N1
4298       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4299
4300     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4301     // Do this only if the resulting shuffle is legal.
4302     if (isa<ShuffleVectorSDNode>(N0) &&
4303         isa<ShuffleVectorSDNode>(N1) &&
4304         // Avoid folding a node with illegal type.
4305         TLI.isTypeLegal(VT)) {
4306       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4307       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4308       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4309       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4310       // Ensure both shuffles have a zero input.
4311       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4312         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4313         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4314         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4315         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4316         bool CanFold = true;
4317         int NumElts = VT.getVectorNumElements();
4318         SmallVector<int, 4> Mask(NumElts);
4319
4320         for (int i = 0; i != NumElts; ++i) {
4321           int M0 = SV0->getMaskElt(i);
4322           int M1 = SV1->getMaskElt(i);
4323
4324           // Determine if either index is pointing to a zero vector.
4325           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4326           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4327
4328           // If one element is zero and the otherside is undef, keep undef.
4329           // This also handles the case that both are undef.
4330           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4331             Mask[i] = -1;
4332             continue;
4333           }
4334
4335           // Make sure only one of the elements is zero.
4336           if (M0Zero == M1Zero) {
4337             CanFold = false;
4338             break;
4339           }
4340
4341           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4342
4343           // We have a zero and non-zero element. If the non-zero came from
4344           // SV0 make the index a LHS index. If it came from SV1, make it
4345           // a RHS index. We need to mod by NumElts because we don't care
4346           // which operand it came from in the original shuffles.
4347           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4348         }
4349
4350         if (CanFold) {
4351           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4352           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4353
4354           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4355           if (!LegalMask) {
4356             std::swap(NewLHS, NewRHS);
4357             ShuffleVectorSDNode::commuteMask(Mask);
4358             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4359           }
4360
4361           if (LegalMask)
4362             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4363         }
4364       }
4365     }
4366   }
4367
4368   // fold (or c1, c2) -> c1|c2
4369   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4370   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4371   if (N0C && N1C && !N1C->isOpaque())
4372     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4373   // canonicalize constant to RHS
4374   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4375      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4376     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4377   // fold (or x, 0) -> x
4378   if (isNullConstant(N1))
4379     return N0;
4380   // fold (or x, -1) -> -1
4381   if (isAllOnesConstant(N1))
4382     return N1;
4383
4384   if (SDValue NewSel = foldBinOpIntoSelect(N))
4385     return NewSel;
4386
4387   // fold (or x, c) -> c iff (x & ~c) == 0
4388   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4389     return N1;
4390
4391   if (SDValue Combined = visitORLike(N0, N1, N))
4392     return Combined;
4393
4394   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4395   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4396     return BSwap;
4397   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4398     return BSwap;
4399
4400   // reassociate or
4401   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4402     return ROR;
4403
4404   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4405   // iff (c1 & c2) != 0.
4406   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4407     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4408       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4409         if (SDValue COR =
4410                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4411           return DAG.getNode(
4412               ISD::AND, SDLoc(N), VT,
4413               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4414         return SDValue();
4415       }
4416     }
4417   }
4418
4419   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4420   if (N0.getOpcode() == N1.getOpcode())
4421     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4422       return Tmp;
4423
4424   // See if this is some rotate idiom.
4425   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4426     return SDValue(Rot, 0);
4427
4428   if (SDValue Load = MatchLoadCombine(N))
4429     return Load;
4430
4431   // Simplify the operands using demanded-bits information.
4432   if (SimplifyDemandedBits(SDValue(N, 0)))
4433     return SDValue(N, 0);
4434
4435   return SDValue();
4436 }
4437
4438 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4439 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4440   if (Op.getOpcode() == ISD::AND) {
4441     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4442       Mask = Op.getOperand(1);
4443       Op = Op.getOperand(0);
4444     } else {
4445       return false;
4446     }
4447   }
4448
4449   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4450     Shift = Op;
4451     return true;
4452   }
4453
4454   return false;
4455 }
4456
4457 // Return true if we can prove that, whenever Neg and Pos are both in the
4458 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4459 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4460 //
4461 //     (or (shift1 X, Neg), (shift2 X, Pos))
4462 //
4463 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4464 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4465 // to consider shift amounts with defined behavior.
4466 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4467   // If EltSize is a power of 2 then:
4468   //
4469   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4470   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4471   //
4472   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4473   // for the stronger condition:
4474   //
4475   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4476   //
4477   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4478   // we can just replace Neg with Neg' for the rest of the function.
4479   //
4480   // In other cases we check for the even stronger condition:
4481   //
4482   //     Neg == EltSize - Pos                                    [B]
4483   //
4484   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4485   // behavior if Pos == 0 (and consequently Neg == EltSize).
4486   //
4487   // We could actually use [A] whenever EltSize is a power of 2, but the
4488   // only extra cases that it would match are those uninteresting ones
4489   // where Neg and Pos are never in range at the same time.  E.g. for
4490   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4491   // as well as (sub 32, Pos), but:
4492   //
4493   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4494   //
4495   // always invokes undefined behavior for 32-bit X.
4496   //
4497   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4498   unsigned MaskLoBits = 0;
4499   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4500     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4501       if (NegC->getAPIntValue() == EltSize - 1) {
4502         Neg = Neg.getOperand(0);
4503         MaskLoBits = Log2_64(EltSize);
4504       }
4505     }
4506   }
4507
4508   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4509   if (Neg.getOpcode() != ISD::SUB)
4510     return false;
4511   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4512   if (!NegC)
4513     return false;
4514   SDValue NegOp1 = Neg.getOperand(1);
4515
4516   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4517   // Pos'.  The truncation is redundant for the purpose of the equality.
4518   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4519     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4520       if (PosC->getAPIntValue() == EltSize - 1)
4521         Pos = Pos.getOperand(0);
4522
4523   // The condition we need is now:
4524   //
4525   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4526   //
4527   // If NegOp1 == Pos then we need:
4528   //
4529   //              EltSize & Mask == NegC & Mask
4530   //
4531   // (because "x & Mask" is a truncation and distributes through subtraction).
4532   APInt Width;
4533   if (Pos == NegOp1)
4534     Width = NegC->getAPIntValue();
4535
4536   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4537   // Then the condition we want to prove becomes:
4538   //
4539   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4540   //
4541   // which, again because "x & Mask" is a truncation, becomes:
4542   //
4543   //                NegC & Mask == (EltSize - PosC) & Mask
4544   //             EltSize & Mask == (NegC + PosC) & Mask
4545   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4546     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4547       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4548     else
4549       return false;
4550   } else
4551     return false;
4552
4553   // Now we just need to check that EltSize & Mask == Width & Mask.
4554   if (MaskLoBits)
4555     // EltSize & Mask is 0 since Mask is EltSize - 1.
4556     return Width.getLoBits(MaskLoBits) == 0;
4557   return Width == EltSize;
4558 }
4559
4560 // A subroutine of MatchRotate used once we have found an OR of two opposite
4561 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4562 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4563 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4564 // Neg with outer conversions stripped away.
4565 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4566                                        SDValue Neg, SDValue InnerPos,
4567                                        SDValue InnerNeg, unsigned PosOpcode,
4568                                        unsigned NegOpcode, const SDLoc &DL) {
4569   // fold (or (shl x, (*ext y)),
4570   //          (srl x, (*ext (sub 32, y)))) ->
4571   //   (rotl x, y) or (rotr x, (sub 32, y))
4572   //
4573   // fold (or (shl x, (*ext (sub 32, y))),
4574   //          (srl x, (*ext y))) ->
4575   //   (rotr x, y) or (rotl x, (sub 32, y))
4576   EVT VT = Shifted.getValueType();
4577   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4578     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4579     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4580                        HasPos ? Pos : Neg).getNode();
4581   }
4582
4583   return nullptr;
4584 }
4585
4586 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4587 // idioms for rotate, and if the target supports rotation instructions, generate
4588 // a rot[lr].
4589 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4590   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4591   EVT VT = LHS.getValueType();
4592   if (!TLI.isTypeLegal(VT)) return nullptr;
4593
4594   // The target must have at least one rotate flavor.
4595   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4596   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4597   if (!HasROTL && !HasROTR) return nullptr;
4598
4599   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4600   SDValue LHSShift;   // The shift.
4601   SDValue LHSMask;    // AND value if any.
4602   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4603     return nullptr; // Not part of a rotate.
4604
4605   SDValue RHSShift;   // The shift.
4606   SDValue RHSMask;    // AND value if any.
4607   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4608     return nullptr; // Not part of a rotate.
4609
4610   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4611     return nullptr;   // Not shifting the same value.
4612
4613   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4614     return nullptr;   // Shifts must disagree.
4615
4616   // Canonicalize shl to left side in a shl/srl pair.
4617   if (RHSShift.getOpcode() == ISD::SHL) {
4618     std::swap(LHS, RHS);
4619     std::swap(LHSShift, RHSShift);
4620     std::swap(LHSMask, RHSMask);
4621   }
4622
4623   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4624   SDValue LHSShiftArg = LHSShift.getOperand(0);
4625   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4626   SDValue RHSShiftArg = RHSShift.getOperand(0);
4627   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4628
4629   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4630   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4631   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4632     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4633     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4634     if ((LShVal + RShVal) != EltSizeInBits)
4635       return nullptr;
4636
4637     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4638                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4639
4640     // If there is an AND of either shifted operand, apply it to the result.
4641     if (LHSMask.getNode() || RHSMask.getNode()) {
4642       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
4643
4644       if (LHSMask.getNode()) {
4645         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4646         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4647                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4648                                        DAG.getConstant(RHSBits, DL, VT)));
4649       }
4650       if (RHSMask.getNode()) {
4651         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4652         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4653                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4654                                        DAG.getConstant(LHSBits, DL, VT)));
4655       }
4656
4657       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4658     }
4659
4660     return Rot.getNode();
4661   }
4662
4663   // If there is a mask here, and we have a variable shift, we can't be sure
4664   // that we're masking out the right stuff.
4665   if (LHSMask.getNode() || RHSMask.getNode())
4666     return nullptr;
4667
4668   // If the shift amount is sign/zext/any-extended just peel it off.
4669   SDValue LExtOp0 = LHSShiftAmt;
4670   SDValue RExtOp0 = RHSShiftAmt;
4671   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4672        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4673        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4674        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4675       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4676        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4677        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4678        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4679     LExtOp0 = LHSShiftAmt.getOperand(0);
4680     RExtOp0 = RHSShiftAmt.getOperand(0);
4681   }
4682
4683   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4684                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4685   if (TryL)
4686     return TryL;
4687
4688   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4689                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4690   if (TryR)
4691     return TryR;
4692
4693   return nullptr;
4694 }
4695
4696 namespace {
4697 /// Helper struct to parse and store a memory address as base + index + offset.
4698 /// We ignore sign extensions when it is safe to do so.
4699 /// The following two expressions are not equivalent. To differentiate we need
4700 /// to store whether there was a sign extension involved in the index
4701 /// computation.
4702 ///  (load (i64 add (i64 copyfromreg %c)
4703 ///                 (i64 signextend (add (i8 load %index)
4704 ///                                      (i8 1))))
4705 /// vs
4706 ///
4707 /// (load (i64 add (i64 copyfromreg %c)
4708 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
4709 ///                                         (i32 1)))))
4710 struct BaseIndexOffset {
4711   SDValue Base;
4712   SDValue Index;
4713   int64_t Offset;
4714   bool IsIndexSignExt;
4715
4716   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
4717
4718   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
4719                   bool IsIndexSignExt) :
4720     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
4721
4722   bool equalBaseIndex(const BaseIndexOffset &Other) {
4723     return Other.Base == Base && Other.Index == Index &&
4724       Other.IsIndexSignExt == IsIndexSignExt;
4725   }
4726
4727   /// Parses tree in Ptr for base, index, offset addresses.
4728   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
4729                                int64_t PartialOffset = 0) {
4730     bool IsIndexSignExt = false;
4731
4732     // Split up a folded GlobalAddress+Offset into its component parts.
4733     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
4734       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
4735         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
4736                                                     SDLoc(GA),
4737                                                     GA->getValueType(0),
4738                                                     /*Offset=*/PartialOffset,
4739                                                     /*isTargetGA=*/false,
4740                                                     GA->getTargetFlags()),
4741                                SDValue(),
4742                                GA->getOffset(),
4743                                IsIndexSignExt);
4744       }
4745
4746     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
4747     // instruction, then it could be just the BASE or everything else we don't
4748     // know how to handle. Just use Ptr as BASE and give up.
4749     if (Ptr->getOpcode() != ISD::ADD)
4750       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4751
4752     // We know that we have at least an ADD instruction. Try to pattern match
4753     // the simple case of BASE + OFFSET.
4754     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
4755       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
4756       return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
4757     }
4758
4759     // Inside a loop the current BASE pointer is calculated using an ADD and a
4760     // MUL instruction. In this case Ptr is the actual BASE pointer.
4761     // (i64 add (i64 %array_ptr)
4762     //          (i64 mul (i64 %induction_var)
4763     //                   (i64 %element_size)))
4764     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
4765       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4766
4767     // Look at Base + Index + Offset cases.
4768     SDValue Base = Ptr->getOperand(0);
4769     SDValue IndexOffset = Ptr->getOperand(1);
4770
4771     // Skip signextends.
4772     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
4773       IndexOffset = IndexOffset->getOperand(0);
4774       IsIndexSignExt = true;
4775     }
4776
4777     // Either the case of Base + Index (no offset) or something else.
4778     if (IndexOffset->getOpcode() != ISD::ADD)
4779       return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
4780
4781     // Now we have the case of Base + Index + offset.
4782     SDValue Index = IndexOffset->getOperand(0);
4783     SDValue Offset = IndexOffset->getOperand(1);
4784
4785     if (!isa<ConstantSDNode>(Offset))
4786       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4787
4788     // Ignore signextends.
4789     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
4790       Index = Index->getOperand(0);
4791       IsIndexSignExt = true;
4792     } else IsIndexSignExt = false;
4793
4794     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
4795     return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
4796   }
4797 };
4798 } // namespace
4799
4800 namespace {
4801 /// Represents known origin of an individual byte in load combine pattern. The
4802 /// value of the byte is either constant zero or comes from memory.
4803 struct ByteProvider {
4804   // For constant zero providers Load is set to nullptr. For memory providers
4805   // Load represents the node which loads the byte from memory.
4806   // ByteOffset is the offset of the byte in the value produced by the load.
4807   LoadSDNode *Load;
4808   unsigned ByteOffset;
4809
4810   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4811
4812   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4813     return ByteProvider(Load, ByteOffset);
4814   }
4815   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4816
4817   bool isConstantZero() const { return !Load; }
4818   bool isMemory() const { return Load; }
4819
4820   bool operator==(const ByteProvider &Other) const {
4821     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4822   }
4823
4824 private:
4825   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4826       : Load(Load), ByteOffset(ByteOffset) {}
4827 };
4828
4829 /// Recursively traverses the expression calculating the origin of the requested
4830 /// byte of the given value. Returns None if the provider can't be calculated.
4831 ///
4832 /// For all the values except the root of the expression verifies that the value
4833 /// has exactly one use and if it's not true return None. This way if the origin
4834 /// of the byte is returned it's guaranteed that the values which contribute to
4835 /// the byte are not used outside of this expression.
4836 ///
4837 /// Because the parts of the expression are not allowed to have more than one
4838 /// use this function iterates over trees, not DAGs. So it never visits the same
4839 /// node more than once.
4840 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4841                                                    unsigned Depth,
4842                                                    bool Root = false) {
4843   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4844   if (Depth == 10)
4845     return None;
4846
4847   if (!Root && !Op.hasOneUse())
4848     return None;
4849
4850   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4851   unsigned BitWidth = Op.getValueSizeInBits();
4852   if (BitWidth % 8 != 0)
4853     return None;
4854   unsigned ByteWidth = BitWidth / 8;
4855   assert(Index < ByteWidth && "invalid index requested");
4856   (void) ByteWidth;
4857
4858   switch (Op.getOpcode()) {
4859   case ISD::OR: {
4860     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4861     if (!LHS)
4862       return None;
4863     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4864     if (!RHS)
4865       return None;
4866
4867     if (LHS->isConstantZero())
4868       return RHS;
4869     if (RHS->isConstantZero())
4870       return LHS;
4871     return None;
4872   }
4873   case ISD::SHL: {
4874     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4875     if (!ShiftOp)
4876       return None;
4877
4878     uint64_t BitShift = ShiftOp->getZExtValue();
4879     if (BitShift % 8 != 0)
4880       return None;
4881     uint64_t ByteShift = BitShift / 8;
4882
4883     return Index < ByteShift
4884                ? ByteProvider::getConstantZero()
4885                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4886                                        Depth + 1);
4887   }
4888   case ISD::ANY_EXTEND:
4889   case ISD::SIGN_EXTEND:
4890   case ISD::ZERO_EXTEND: {
4891     SDValue NarrowOp = Op->getOperand(0);
4892     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4893     if (NarrowBitWidth % 8 != 0)
4894       return None;
4895     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4896
4897     if (Index >= NarrowByteWidth)
4898       return Op.getOpcode() == ISD::ZERO_EXTEND
4899                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4900                  : None;
4901     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4902   }
4903   case ISD::BSWAP:
4904     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4905                                  Depth + 1);
4906   case ISD::LOAD: {
4907     auto L = cast<LoadSDNode>(Op.getNode());
4908     if (L->isVolatile() || L->isIndexed())
4909       return None;
4910
4911     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4912     if (NarrowBitWidth % 8 != 0)
4913       return None;
4914     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4915
4916     if (Index >= NarrowByteWidth)
4917       return L->getExtensionType() == ISD::ZEXTLOAD
4918                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4919                  : None;
4920     return ByteProvider::getMemory(L, Index);
4921   }
4922   }
4923
4924   return None;
4925 }
4926 } // namespace
4927
4928 /// Match a pattern where a wide type scalar value is loaded by several narrow
4929 /// loads and combined by shifts and ors. Fold it into a single load or a load
4930 /// and a BSWAP if the targets supports it.
4931 ///
4932 /// Assuming little endian target:
4933 ///  i8 *a = ...
4934 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4935 /// =>
4936 ///  i32 val = *((i32)a)
4937 ///
4938 ///  i8 *a = ...
4939 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4940 /// =>
4941 ///  i32 val = BSWAP(*((i32)a))
4942 ///
4943 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4944 /// interact well with the worklist mechanism. When a part of the pattern is
4945 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4946 /// but the root node of the pattern which triggers the load combine is not
4947 /// necessarily a direct user of the changed node. For example, once the address
4948 /// of t28 load is reassociated load combine won't be triggered:
4949 ///             t25: i32 = add t4, Constant:i32<2>
4950 ///           t26: i64 = sign_extend t25
4951 ///        t27: i64 = add t2, t26
4952 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4953 ///     t29: i32 = zero_extend t28
4954 ///   t32: i32 = shl t29, Constant:i8<8>
4955 /// t33: i32 = or t23, t32
4956 /// As a possible fix visitLoad can check if the load can be a part of a load
4957 /// combine pattern and add corresponding OR roots to the worklist.
4958 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4959   assert(N->getOpcode() == ISD::OR &&
4960          "Can only match load combining against OR nodes");
4961
4962   // Handles simple types only
4963   EVT VT = N->getValueType(0);
4964   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4965     return SDValue();
4966   unsigned ByteWidth = VT.getSizeInBits() / 8;
4967
4968   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4969   // Before legalize we can introduce too wide illegal loads which will be later
4970   // split into legal sized loads. This enables us to combine i64 load by i8
4971   // patterns to a couple of i32 loads on 32 bit targets.
4972   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4973     return SDValue();
4974
4975   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4976     unsigned BW, unsigned i) { return i; };
4977   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4978     unsigned BW, unsigned i) { return BW - i - 1; };
4979
4980   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4981   auto MemoryByteOffset = [&] (ByteProvider P) {
4982     assert(P.isMemory() && "Must be a memory byte provider");
4983     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4984     assert(LoadBitWidth % 8 == 0 &&
4985            "can only analyze providers for individual bytes not bit");
4986     unsigned LoadByteWidth = LoadBitWidth / 8;
4987     return IsBigEndianTarget
4988             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4989             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4990   };
4991
4992   Optional<BaseIndexOffset> Base;
4993   SDValue Chain;
4994
4995   SmallSet<LoadSDNode *, 8> Loads;
4996   Optional<ByteProvider> FirstByteProvider;
4997   int64_t FirstOffset = INT64_MAX;
4998
4999   // Check if all the bytes of the OR we are looking at are loaded from the same
5000   // base address. Collect bytes offsets from Base address in ByteOffsets.
5001   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5002   for (unsigned i = 0; i < ByteWidth; i++) {
5003     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5004     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5005       return SDValue();
5006
5007     LoadSDNode *L = P->Load;
5008     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5009            "Must be enforced by calculateByteProvider");
5010     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5011
5012     // All loads must share the same chain
5013     SDValue LChain = L->getChain();
5014     if (!Chain)
5015       Chain = LChain;
5016     else if (Chain != LChain)
5017       return SDValue();
5018
5019     // Loads must share the same base address
5020     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
5021     if (!Base)
5022       Base = Ptr;
5023     else if (!Base->equalBaseIndex(Ptr))
5024       return SDValue();
5025
5026     // Calculate the offset of the current byte from the base address
5027     int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
5028     ByteOffsets[i] = ByteOffsetFromBase;
5029
5030     // Remember the first byte load
5031     if (ByteOffsetFromBase < FirstOffset) {
5032       FirstByteProvider = P;
5033       FirstOffset = ByteOffsetFromBase;
5034     }
5035
5036     Loads.insert(L);
5037   }
5038   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
5039          "memory, so there must be at least one load which produces the value");
5040   assert(Base && "Base address of the accessed memory location must be set");
5041   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5042
5043   // Check if the bytes of the OR we are looking at match with either big or
5044   // little endian value load
5045   bool BigEndian = true, LittleEndian = true;
5046   for (unsigned i = 0; i < ByteWidth; i++) {
5047     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5048     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5049     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5050     if (!BigEndian && !LittleEndian)
5051       return SDValue();
5052   }
5053   assert((BigEndian != LittleEndian) && "should be either or");
5054   assert(FirstByteProvider && "must be set");
5055
5056   // Ensure that the first byte is loaded from zero offset of the first load.
5057   // So the combined value can be loaded from the first load address.
5058   if (MemoryByteOffset(*FirstByteProvider) != 0)
5059     return SDValue();
5060   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5061
5062   // The node we are looking at matches with the pattern, check if we can
5063   // replace it with a single load and bswap if needed.
5064
5065   // If the load needs byte swap check if the target supports it
5066   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5067
5068   // Before legalize we can introduce illegal bswaps which will be later
5069   // converted to an explicit bswap sequence. This way we end up with a single
5070   // load and byte shuffling instead of several loads and byte shuffling.
5071   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5072     return SDValue();
5073
5074   // Check that a load of the wide type is both allowed and fast on the target
5075   bool Fast = false;
5076   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5077                                         VT, FirstLoad->getAddressSpace(),
5078                                         FirstLoad->getAlignment(), &Fast);
5079   if (!Allowed || !Fast)
5080     return SDValue();
5081
5082   SDValue NewLoad =
5083       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5084                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5085
5086   // Transfer chain users from old loads to the new load.
5087   for (LoadSDNode *L : Loads)
5088     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5089
5090   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5091 }
5092
5093 SDValue DAGCombiner::visitXOR(SDNode *N) {
5094   SDValue N0 = N->getOperand(0);
5095   SDValue N1 = N->getOperand(1);
5096   EVT VT = N0.getValueType();
5097
5098   // fold vector ops
5099   if (VT.isVector()) {
5100     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5101       return FoldedVOp;
5102
5103     // fold (xor x, 0) -> x, vector edition
5104     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5105       return N1;
5106     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5107       return N0;
5108   }
5109
5110   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5111   if (N0.isUndef() && N1.isUndef())
5112     return DAG.getConstant(0, SDLoc(N), VT);
5113   // fold (xor x, undef) -> undef
5114   if (N0.isUndef())
5115     return N0;
5116   if (N1.isUndef())
5117     return N1;
5118   // fold (xor c1, c2) -> c1^c2
5119   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5120   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5121   if (N0C && N1C)
5122     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5123   // canonicalize constant to RHS
5124   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5125      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5126     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5127   // fold (xor x, 0) -> x
5128   if (isNullConstant(N1))
5129     return N0;
5130
5131   if (SDValue NewSel = foldBinOpIntoSelect(N))
5132     return NewSel;
5133
5134   // reassociate xor
5135   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5136     return RXOR;
5137
5138   // fold !(x cc y) -> (x !cc y)
5139   SDValue LHS, RHS, CC;
5140   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5141     bool isInt = LHS.getValueType().isInteger();
5142     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5143                                                isInt);
5144
5145     if (!LegalOperations ||
5146         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5147       switch (N0.getOpcode()) {
5148       default:
5149         llvm_unreachable("Unhandled SetCC Equivalent!");
5150       case ISD::SETCC:
5151         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5152       case ISD::SELECT_CC:
5153         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5154                                N0.getOperand(3), NotCC);
5155       }
5156     }
5157   }
5158
5159   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5160   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5161       N0.getNode()->hasOneUse() &&
5162       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5163     SDValue V = N0.getOperand(0);
5164     SDLoc DL(N0);
5165     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5166                     DAG.getConstant(1, DL, V.getValueType()));
5167     AddToWorklist(V.getNode());
5168     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5169   }
5170
5171   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5172   if (isOneConstant(N1) && VT == MVT::i1 &&
5173       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5174     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5175     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5176       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5177       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5178       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5179       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5180       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5181     }
5182   }
5183   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5184   if (isAllOnesConstant(N1) &&
5185       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5186     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5187     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5188       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5189       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5190       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5191       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5192       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5193     }
5194   }
5195   // fold (xor (and x, y), y) -> (and (not x), y)
5196   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5197       N0->getOperand(1) == N1) {
5198     SDValue X = N0->getOperand(0);
5199     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5200     AddToWorklist(NotX.getNode());
5201     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5202   }
5203   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5204   if (N1C && N0.getOpcode() == ISD::XOR) {
5205     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
5206       SDLoc DL(N);
5207       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5208                          DAG.getConstant(N1C->getAPIntValue() ^
5209                                          N00C->getAPIntValue(), DL, VT));
5210     }
5211     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5212       SDLoc DL(N);
5213       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5214                          DAG.getConstant(N1C->getAPIntValue() ^
5215                                          N01C->getAPIntValue(), DL, VT));
5216     }
5217   }
5218
5219   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5220   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5221   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5222       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5223       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5224     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5225       if (C->getAPIntValue() == (OpSizeInBits - 1))
5226         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5227   }
5228
5229   // fold (xor x, x) -> 0
5230   if (N0 == N1)
5231     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5232
5233   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5234   // Here is a concrete example of this equivalence:
5235   // i16   x ==  14
5236   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5237   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5238   //
5239   // =>
5240   //
5241   // i16     ~1      == 0b1111111111111110
5242   // i16 rol(~1, 14) == 0b1011111111111111
5243   //
5244   // Some additional tips to help conceptualize this transform:
5245   // - Try to see the operation as placing a single zero in a value of all ones.
5246   // - There exists no value for x which would allow the result to contain zero.
5247   // - Values of x larger than the bitwidth are undefined and do not require a
5248   //   consistent result.
5249   // - Pushing the zero left requires shifting one bits in from the right.
5250   // A rotate left of ~1 is a nice way of achieving the desired result.
5251   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5252       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5253     SDLoc DL(N);
5254     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5255                        N0.getOperand(1));
5256   }
5257
5258   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5259   if (N0.getOpcode() == N1.getOpcode())
5260     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5261       return Tmp;
5262
5263   // Simplify the expression using non-local knowledge.
5264   if (SimplifyDemandedBits(SDValue(N, 0)))
5265     return SDValue(N, 0);
5266
5267   return SDValue();
5268 }
5269
5270 /// Handle transforms common to the three shifts, when the shift amount is a
5271 /// constant.
5272 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5273   SDNode *LHS = N->getOperand(0).getNode();
5274   if (!LHS->hasOneUse()) return SDValue();
5275
5276   // We want to pull some binops through shifts, so that we have (and (shift))
5277   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5278   // thing happens with address calculations, so it's important to canonicalize
5279   // it.
5280   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5281
5282   switch (LHS->getOpcode()) {
5283   default: return SDValue();
5284   case ISD::OR:
5285   case ISD::XOR:
5286     HighBitSet = false; // We can only transform sra if the high bit is clear.
5287     break;
5288   case ISD::AND:
5289     HighBitSet = true;  // We can only transform sra if the high bit is set.
5290     break;
5291   case ISD::ADD:
5292     if (N->getOpcode() != ISD::SHL)
5293       return SDValue(); // only shl(add) not sr[al](add).
5294     HighBitSet = false; // We can only transform sra if the high bit is clear.
5295     break;
5296   }
5297
5298   // We require the RHS of the binop to be a constant and not opaque as well.
5299   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5300   if (!BinOpCst) return SDValue();
5301
5302   // FIXME: disable this unless the input to the binop is a shift by a constant
5303   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5304   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5305   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5306                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5307                  BinOpLHSVal->getOpcode() == ISD::SRL;
5308   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5309                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5310
5311   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5312       !isCopyOrSelect)
5313     return SDValue();
5314
5315   if (isCopyOrSelect && N->hasOneUse())
5316     return SDValue();
5317
5318   EVT VT = N->getValueType(0);
5319
5320   // If this is a signed shift right, and the high bit is modified by the
5321   // logical operation, do not perform the transformation. The highBitSet
5322   // boolean indicates the value of the high bit of the constant which would
5323   // cause it to be modified for this operation.
5324   if (N->getOpcode() == ISD::SRA) {
5325     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5326     if (BinOpRHSSignSet != HighBitSet)
5327       return SDValue();
5328   }
5329
5330   if (!TLI.isDesirableToCommuteWithShift(LHS))
5331     return SDValue();
5332
5333   // Fold the constants, shifting the binop RHS by the shift amount.
5334   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5335                                N->getValueType(0),
5336                                LHS->getOperand(1), N->getOperand(1));
5337   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5338
5339   // Create the new shift.
5340   SDValue NewShift = DAG.getNode(N->getOpcode(),
5341                                  SDLoc(LHS->getOperand(0)),
5342                                  VT, LHS->getOperand(0), N->getOperand(1));
5343
5344   // Create the new binop.
5345   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5346 }
5347
5348 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5349   assert(N->getOpcode() == ISD::TRUNCATE);
5350   assert(N->getOperand(0).getOpcode() == ISD::AND);
5351
5352   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5353   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5354     SDValue N01 = N->getOperand(0).getOperand(1);
5355     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5356       SDLoc DL(N);
5357       EVT TruncVT = N->getValueType(0);
5358       SDValue N00 = N->getOperand(0).getOperand(0);
5359       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5360       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5361       AddToWorklist(Trunc00.getNode());
5362       AddToWorklist(Trunc01.getNode());
5363       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5364     }
5365   }
5366
5367   return SDValue();
5368 }
5369
5370 SDValue DAGCombiner::visitRotate(SDNode *N) {
5371   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5372   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
5373       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
5374     if (SDValue NewOp1 =
5375             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
5376       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
5377                          N->getOperand(0), NewOp1);
5378   }
5379   return SDValue();
5380 }
5381
5382 SDValue DAGCombiner::visitSHL(SDNode *N) {
5383   SDValue N0 = N->getOperand(0);
5384   SDValue N1 = N->getOperand(1);
5385   EVT VT = N0.getValueType();
5386   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5387
5388   // fold vector ops
5389   if (VT.isVector()) {
5390     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5391       return FoldedVOp;
5392
5393     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5394     // If setcc produces all-one true value then:
5395     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5396     if (N1CV && N1CV->isConstant()) {
5397       if (N0.getOpcode() == ISD::AND) {
5398         SDValue N00 = N0->getOperand(0);
5399         SDValue N01 = N0->getOperand(1);
5400         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5401
5402         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5403             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5404                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5405           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5406                                                      N01CV, N1CV))
5407             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5408         }
5409       }
5410     }
5411   }
5412
5413   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5414
5415   // fold (shl c1, c2) -> c1<<c2
5416   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5417   if (N0C && N1C && !N1C->isOpaque())
5418     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5419   // fold (shl 0, x) -> 0
5420   if (isNullConstantOrNullSplatConstant(N0))
5421     return N0;
5422   // fold (shl x, c >= size(x)) -> undef
5423   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5424     return DAG.getUNDEF(VT);
5425   // fold (shl x, 0) -> x
5426   if (N1C && N1C->isNullValue())
5427     return N0;
5428   // fold (shl undef, x) -> 0
5429   if (N0.isUndef())
5430     return DAG.getConstant(0, SDLoc(N), VT);
5431
5432   if (SDValue NewSel = foldBinOpIntoSelect(N))
5433     return NewSel;
5434
5435   // if (shl x, c) is known to be zero, return 0
5436   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5437                             APInt::getAllOnesValue(OpSizeInBits)))
5438     return DAG.getConstant(0, SDLoc(N), VT);
5439   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5440   if (N1.getOpcode() == ISD::TRUNCATE &&
5441       N1.getOperand(0).getOpcode() == ISD::AND) {
5442     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5443       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5444   }
5445
5446   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5447     return SDValue(N, 0);
5448
5449   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5450   if (N1C && N0.getOpcode() == ISD::SHL) {
5451     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5452       SDLoc DL(N);
5453       APInt c1 = N0C1->getAPIntValue();
5454       APInt c2 = N1C->getAPIntValue();
5455       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5456
5457       APInt Sum = c1 + c2;
5458       if (Sum.uge(OpSizeInBits))
5459         return DAG.getConstant(0, DL, VT);
5460
5461       return DAG.getNode(
5462           ISD::SHL, DL, VT, N0.getOperand(0),
5463           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5464     }
5465   }
5466
5467   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5468   // For this to be valid, the second form must not preserve any of the bits
5469   // that are shifted out by the inner shift in the first form.  This means
5470   // the outer shift size must be >= the number of bits added by the ext.
5471   // As a corollary, we don't care what kind of ext it is.
5472   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5473               N0.getOpcode() == ISD::ANY_EXTEND ||
5474               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5475       N0.getOperand(0).getOpcode() == ISD::SHL) {
5476     SDValue N0Op0 = N0.getOperand(0);
5477     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5478       APInt c1 = N0Op0C1->getAPIntValue();
5479       APInt c2 = N1C->getAPIntValue();
5480       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5481
5482       EVT InnerShiftVT = N0Op0.getValueType();
5483       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5484       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5485         SDLoc DL(N0);
5486         APInt Sum = c1 + c2;
5487         if (Sum.uge(OpSizeInBits))
5488           return DAG.getConstant(0, DL, VT);
5489
5490         return DAG.getNode(
5491             ISD::SHL, DL, VT,
5492             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5493             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5494       }
5495     }
5496   }
5497
5498   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5499   // Only fold this if the inner zext has no other uses to avoid increasing
5500   // the total number of instructions.
5501   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5502       N0.getOperand(0).getOpcode() == ISD::SRL) {
5503     SDValue N0Op0 = N0.getOperand(0);
5504     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5505       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5506         uint64_t c1 = N0Op0C1->getZExtValue();
5507         uint64_t c2 = N1C->getZExtValue();
5508         if (c1 == c2) {
5509           SDValue NewOp0 = N0.getOperand(0);
5510           EVT CountVT = NewOp0.getOperand(1).getValueType();
5511           SDLoc DL(N);
5512           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5513                                        NewOp0,
5514                                        DAG.getConstant(c2, DL, CountVT));
5515           AddToWorklist(NewSHL.getNode());
5516           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5517         }
5518       }
5519     }
5520   }
5521
5522   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5523   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5524   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5525       N0->getFlags().hasExact()) {
5526     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5527       uint64_t C1 = N0C1->getZExtValue();
5528       uint64_t C2 = N1C->getZExtValue();
5529       SDLoc DL(N);
5530       if (C1 <= C2)
5531         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5532                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5533       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5534                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5535     }
5536   }
5537
5538   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5539   //                               (and (srl x, (sub c1, c2), MASK)
5540   // Only fold this if the inner shift has no other uses -- if it does, folding
5541   // this will increase the total number of instructions.
5542   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5543     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5544       uint64_t c1 = N0C1->getZExtValue();
5545       if (c1 < OpSizeInBits) {
5546         uint64_t c2 = N1C->getZExtValue();
5547         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5548         SDValue Shift;
5549         if (c2 > c1) {
5550           Mask <<= c2 - c1;
5551           SDLoc DL(N);
5552           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5553                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5554         } else {
5555           Mask.lshrInPlace(c1 - c2);
5556           SDLoc DL(N);
5557           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5558                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5559         }
5560         SDLoc DL(N0);
5561         return DAG.getNode(ISD::AND, DL, VT, Shift,
5562                            DAG.getConstant(Mask, DL, VT));
5563       }
5564     }
5565   }
5566
5567   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5568   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5569       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5570     SDLoc DL(N);
5571     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5572     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5573     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5574   }
5575
5576   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5577   // Variant of version done on multiply, except mul by a power of 2 is turned
5578   // into a shift.
5579   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5580       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5581       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5582     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5583     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5584     AddToWorklist(Shl0.getNode());
5585     AddToWorklist(Shl1.getNode());
5586     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5587   }
5588
5589   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5590   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5591       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5592       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5593     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5594     if (isConstantOrConstantVector(Shl))
5595       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5596   }
5597
5598   if (N1C && !N1C->isOpaque())
5599     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5600       return NewSHL;
5601
5602   return SDValue();
5603 }
5604
5605 SDValue DAGCombiner::visitSRA(SDNode *N) {
5606   SDValue N0 = N->getOperand(0);
5607   SDValue N1 = N->getOperand(1);
5608   EVT VT = N0.getValueType();
5609   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5610
5611   // Arithmetic shifting an all-sign-bit value is a no-op.
5612   // fold (sra 0, x) -> 0
5613   // fold (sra -1, x) -> -1
5614   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5615     return N0;
5616
5617   // fold vector ops
5618   if (VT.isVector())
5619     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5620       return FoldedVOp;
5621
5622   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5623
5624   // fold (sra c1, c2) -> (sra c1, c2)
5625   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5626   if (N0C && N1C && !N1C->isOpaque())
5627     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5628   // fold (sra x, c >= size(x)) -> undef
5629   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5630     return DAG.getUNDEF(VT);
5631   // fold (sra x, 0) -> x
5632   if (N1C && N1C->isNullValue())
5633     return N0;
5634
5635   if (SDValue NewSel = foldBinOpIntoSelect(N))
5636     return NewSel;
5637
5638   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5639   // sext_inreg.
5640   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5641     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5642     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5643     if (VT.isVector())
5644       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5645                                ExtVT, VT.getVectorNumElements());
5646     if ((!LegalOperations ||
5647          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5648       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5649                          N0.getOperand(0), DAG.getValueType(ExtVT));
5650   }
5651
5652   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5653   if (N1C && N0.getOpcode() == ISD::SRA) {
5654     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5655       SDLoc DL(N);
5656       APInt c1 = N0C1->getAPIntValue();
5657       APInt c2 = N1C->getAPIntValue();
5658       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5659
5660       APInt Sum = c1 + c2;
5661       if (Sum.uge(OpSizeInBits))
5662         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
5663
5664       return DAG.getNode(
5665           ISD::SRA, DL, VT, N0.getOperand(0),
5666           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5667     }
5668   }
5669
5670   // fold (sra (shl X, m), (sub result_size, n))
5671   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5672   // result_size - n != m.
5673   // If truncate is free for the target sext(shl) is likely to result in better
5674   // code.
5675   if (N0.getOpcode() == ISD::SHL && N1C) {
5676     // Get the two constanst of the shifts, CN0 = m, CN = n.
5677     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5678     if (N01C) {
5679       LLVMContext &Ctx = *DAG.getContext();
5680       // Determine what the truncate's result bitsize and type would be.
5681       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5682
5683       if (VT.isVector())
5684         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5685
5686       // Determine the residual right-shift amount.
5687       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5688
5689       // If the shift is not a no-op (in which case this should be just a sign
5690       // extend already), the truncated to type is legal, sign_extend is legal
5691       // on that type, and the truncate to that type is both legal and free,
5692       // perform the transform.
5693       if ((ShiftAmt > 0) &&
5694           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5695           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5696           TLI.isTruncateFree(VT, TruncVT)) {
5697
5698         SDLoc DL(N);
5699         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5700             getShiftAmountTy(N0.getOperand(0).getValueType()));
5701         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5702                                     N0.getOperand(0), Amt);
5703         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5704                                     Shift);
5705         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5706                            N->getValueType(0), Trunc);
5707       }
5708     }
5709   }
5710
5711   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5712   if (N1.getOpcode() == ISD::TRUNCATE &&
5713       N1.getOperand(0).getOpcode() == ISD::AND) {
5714     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5715       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5716   }
5717
5718   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5719   //      if c1 is equal to the number of bits the trunc removes
5720   if (N0.getOpcode() == ISD::TRUNCATE &&
5721       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5722        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5723       N0.getOperand(0).hasOneUse() &&
5724       N0.getOperand(0).getOperand(1).hasOneUse() &&
5725       N1C) {
5726     SDValue N0Op0 = N0.getOperand(0);
5727     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5728       unsigned LargeShiftVal = LargeShift->getZExtValue();
5729       EVT LargeVT = N0Op0.getValueType();
5730
5731       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5732         SDLoc DL(N);
5733         SDValue Amt =
5734           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5735                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5736         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5737                                   N0Op0.getOperand(0), Amt);
5738         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5739       }
5740     }
5741   }
5742
5743   // Simplify, based on bits shifted out of the LHS.
5744   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5745     return SDValue(N, 0);
5746
5747
5748   // If the sign bit is known to be zero, switch this to a SRL.
5749   if (DAG.SignBitIsZero(N0))
5750     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5751
5752   if (N1C && !N1C->isOpaque())
5753     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5754       return NewSRA;
5755
5756   return SDValue();
5757 }
5758
5759 SDValue DAGCombiner::visitSRL(SDNode *N) {
5760   SDValue N0 = N->getOperand(0);
5761   SDValue N1 = N->getOperand(1);
5762   EVT VT = N0.getValueType();
5763   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5764
5765   // fold vector ops
5766   if (VT.isVector())
5767     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5768       return FoldedVOp;
5769
5770   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5771
5772   // fold (srl c1, c2) -> c1 >>u c2
5773   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5774   if (N0C && N1C && !N1C->isOpaque())
5775     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5776   // fold (srl 0, x) -> 0
5777   if (isNullConstantOrNullSplatConstant(N0))
5778     return N0;
5779   // fold (srl x, c >= size(x)) -> undef
5780   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5781     return DAG.getUNDEF(VT);
5782   // fold (srl x, 0) -> x
5783   if (N1C && N1C->isNullValue())
5784     return N0;
5785
5786   if (SDValue NewSel = foldBinOpIntoSelect(N))
5787     return NewSel;
5788
5789   // if (srl x, c) is known to be zero, return 0
5790   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5791                                    APInt::getAllOnesValue(OpSizeInBits)))
5792     return DAG.getConstant(0, SDLoc(N), VT);
5793
5794   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5795   if (N1C && N0.getOpcode() == ISD::SRL) {
5796     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5797       SDLoc DL(N);
5798       APInt c1 = N0C1->getAPIntValue();
5799       APInt c2 = N1C->getAPIntValue();
5800       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5801
5802       APInt Sum = c1 + c2;
5803       if (Sum.uge(OpSizeInBits))
5804         return DAG.getConstant(0, DL, VT);
5805
5806       return DAG.getNode(
5807           ISD::SRL, DL, VT, N0.getOperand(0),
5808           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5809     }
5810   }
5811
5812   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5813   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5814       N0.getOperand(0).getOpcode() == ISD::SRL) {
5815     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5816       uint64_t c1 = N001C->getZExtValue();
5817       uint64_t c2 = N1C->getZExtValue();
5818       EVT InnerShiftVT = N0.getOperand(0).getValueType();
5819       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5820       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5821       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5822       if (c1 + OpSizeInBits == InnerShiftSize) {
5823         SDLoc DL(N0);
5824         if (c1 + c2 >= InnerShiftSize)
5825           return DAG.getConstant(0, DL, VT);
5826         return DAG.getNode(ISD::TRUNCATE, DL, VT,
5827                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5828                                        N0.getOperand(0).getOperand(0),
5829                                        DAG.getConstant(c1 + c2, DL,
5830                                                        ShiftCountVT)));
5831       }
5832     }
5833   }
5834
5835   // fold (srl (shl x, c), c) -> (and x, cst2)
5836   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5837       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5838     SDLoc DL(N);
5839     SDValue Mask =
5840         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5841     AddToWorklist(Mask.getNode());
5842     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5843   }
5844
5845   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5846   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5847     // Shifting in all undef bits?
5848     EVT SmallVT = N0.getOperand(0).getValueType();
5849     unsigned BitSize = SmallVT.getScalarSizeInBits();
5850     if (N1C->getZExtValue() >= BitSize)
5851       return DAG.getUNDEF(VT);
5852
5853     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5854       uint64_t ShiftAmt = N1C->getZExtValue();
5855       SDLoc DL0(N0);
5856       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5857                                        N0.getOperand(0),
5858                           DAG.getConstant(ShiftAmt, DL0,
5859                                           getShiftAmountTy(SmallVT)));
5860       AddToWorklist(SmallShift.getNode());
5861       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5862       SDLoc DL(N);
5863       return DAG.getNode(ISD::AND, DL, VT,
5864                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5865                          DAG.getConstant(Mask, DL, VT));
5866     }
5867   }
5868
5869   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5870   // bit, which is unmodified by sra.
5871   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5872     if (N0.getOpcode() == ISD::SRA)
5873       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5874   }
5875
5876   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5877   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5878       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5879     KnownBits Known;
5880     DAG.computeKnownBits(N0.getOperand(0), Known);
5881
5882     // If any of the input bits are KnownOne, then the input couldn't be all
5883     // zeros, thus the result of the srl will always be zero.
5884     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5885
5886     // If all of the bits input the to ctlz node are known to be zero, then
5887     // the result of the ctlz is "32" and the result of the shift is one.
5888     APInt UnknownBits = ~Known.Zero;
5889     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5890
5891     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5892     if (UnknownBits.isPowerOf2()) {
5893       // Okay, we know that only that the single bit specified by UnknownBits
5894       // could be set on input to the CTLZ node. If this bit is set, the SRL
5895       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5896       // to an SRL/XOR pair, which is likely to simplify more.
5897       unsigned ShAmt = UnknownBits.countTrailingZeros();
5898       SDValue Op = N0.getOperand(0);
5899
5900       if (ShAmt) {
5901         SDLoc DL(N0);
5902         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5903                   DAG.getConstant(ShAmt, DL,
5904                                   getShiftAmountTy(Op.getValueType())));
5905         AddToWorklist(Op.getNode());
5906       }
5907
5908       SDLoc DL(N);
5909       return DAG.getNode(ISD::XOR, DL, VT,
5910                          Op, DAG.getConstant(1, DL, VT));
5911     }
5912   }
5913
5914   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5915   if (N1.getOpcode() == ISD::TRUNCATE &&
5916       N1.getOperand(0).getOpcode() == ISD::AND) {
5917     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5918       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5919   }
5920
5921   // fold operands of srl based on knowledge that the low bits are not
5922   // demanded.
5923   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5924     return SDValue(N, 0);
5925
5926   if (N1C && !N1C->isOpaque())
5927     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5928       return NewSRL;
5929
5930   // Attempt to convert a srl of a load into a narrower zero-extending load.
5931   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5932     return NarrowLoad;
5933
5934   // Here is a common situation. We want to optimize:
5935   //
5936   //   %a = ...
5937   //   %b = and i32 %a, 2
5938   //   %c = srl i32 %b, 1
5939   //   brcond i32 %c ...
5940   //
5941   // into
5942   //
5943   //   %a = ...
5944   //   %b = and %a, 2
5945   //   %c = setcc eq %b, 0
5946   //   brcond %c ...
5947   //
5948   // However when after the source operand of SRL is optimized into AND, the SRL
5949   // itself may not be optimized further. Look for it and add the BRCOND into
5950   // the worklist.
5951   if (N->hasOneUse()) {
5952     SDNode *Use = *N->use_begin();
5953     if (Use->getOpcode() == ISD::BRCOND)
5954       AddToWorklist(Use);
5955     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5956       // Also look pass the truncate.
5957       Use = *Use->use_begin();
5958       if (Use->getOpcode() == ISD::BRCOND)
5959         AddToWorklist(Use);
5960     }
5961   }
5962
5963   return SDValue();
5964 }
5965
5966 SDValue DAGCombiner::visitABS(SDNode *N) {
5967   SDValue N0 = N->getOperand(0);
5968   EVT VT = N->getValueType(0);
5969
5970   // fold (abs c1) -> c2
5971   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5972     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
5973   // fold (abs (abs x)) -> (abs x)
5974   if (N0.getOpcode() == ISD::ABS)
5975     return N0;
5976   // fold (abs x) -> x iff not-negative
5977   if (DAG.SignBitIsZero(N0))
5978     return N0;
5979   return SDValue();
5980 }
5981
5982 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5983   SDValue N0 = N->getOperand(0);
5984   EVT VT = N->getValueType(0);
5985
5986   // fold (bswap c1) -> c2
5987   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5988     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5989   // fold (bswap (bswap x)) -> x
5990   if (N0.getOpcode() == ISD::BSWAP)
5991     return N0->getOperand(0);
5992   return SDValue();
5993 }
5994
5995 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5996   SDValue N0 = N->getOperand(0);
5997   EVT VT = N->getValueType(0);
5998
5999   // fold (bitreverse c1) -> c2
6000   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6001     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6002   // fold (bitreverse (bitreverse x)) -> x
6003   if (N0.getOpcode() == ISD::BITREVERSE)
6004     return N0.getOperand(0);
6005   return SDValue();
6006 }
6007
6008 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6009   SDValue N0 = N->getOperand(0);
6010   EVT VT = N->getValueType(0);
6011
6012   // fold (ctlz c1) -> c2
6013   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6014     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6015   return SDValue();
6016 }
6017
6018 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6019   SDValue N0 = N->getOperand(0);
6020   EVT VT = N->getValueType(0);
6021
6022   // fold (ctlz_zero_undef c1) -> c2
6023   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6024     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6025   return SDValue();
6026 }
6027
6028 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6029   SDValue N0 = N->getOperand(0);
6030   EVT VT = N->getValueType(0);
6031
6032   // fold (cttz c1) -> c2
6033   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6034     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6035   return SDValue();
6036 }
6037
6038 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6039   SDValue N0 = N->getOperand(0);
6040   EVT VT = N->getValueType(0);
6041
6042   // fold (cttz_zero_undef c1) -> c2
6043   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6044     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6045   return SDValue();
6046 }
6047
6048 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6049   SDValue N0 = N->getOperand(0);
6050   EVT VT = N->getValueType(0);
6051
6052   // fold (ctpop c1) -> c2
6053   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6054     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6055   return SDValue();
6056 }
6057
6058
6059 /// \brief Generate Min/Max node
6060 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6061                                    SDValue RHS, SDValue True, SDValue False,
6062                                    ISD::CondCode CC, const TargetLowering &TLI,
6063                                    SelectionDAG &DAG) {
6064   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6065     return SDValue();
6066
6067   switch (CC) {
6068   case ISD::SETOLT:
6069   case ISD::SETOLE:
6070   case ISD::SETLT:
6071   case ISD::SETLE:
6072   case ISD::SETULT:
6073   case ISD::SETULE: {
6074     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6075     if (TLI.isOperationLegal(Opcode, VT))
6076       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6077     return SDValue();
6078   }
6079   case ISD::SETOGT:
6080   case ISD::SETOGE:
6081   case ISD::SETGT:
6082   case ISD::SETGE:
6083   case ISD::SETUGT:
6084   case ISD::SETUGE: {
6085     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6086     if (TLI.isOperationLegal(Opcode, VT))
6087       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6088     return SDValue();
6089   }
6090   default:
6091     return SDValue();
6092   }
6093 }
6094
6095 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6096   SDValue Cond = N->getOperand(0);
6097   SDValue N1 = N->getOperand(1);
6098   SDValue N2 = N->getOperand(2);
6099   EVT VT = N->getValueType(0);
6100   EVT CondVT = Cond.getValueType();
6101   SDLoc DL(N);
6102
6103   if (!VT.isInteger())
6104     return SDValue();
6105
6106   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6107   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6108   if (!C1 || !C2)
6109     return SDValue();
6110
6111   // Only do this before legalization to avoid conflicting with target-specific
6112   // transforms in the other direction (create a select from a zext/sext). There
6113   // is also a target-independent combine here in DAGCombiner in the other
6114   // direction for (select Cond, -1, 0) when the condition is not i1.
6115   if (CondVT == MVT::i1 && !LegalOperations) {
6116     if (C1->isNullValue() && C2->isOne()) {
6117       // select Cond, 0, 1 --> zext (!Cond)
6118       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6119       if (VT != MVT::i1)
6120         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6121       return NotCond;
6122     }
6123     if (C1->isNullValue() && C2->isAllOnesValue()) {
6124       // select Cond, 0, -1 --> sext (!Cond)
6125       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6126       if (VT != MVT::i1)
6127         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6128       return NotCond;
6129     }
6130     if (C1->isOne() && C2->isNullValue()) {
6131       // select Cond, 1, 0 --> zext (Cond)
6132       if (VT != MVT::i1)
6133         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6134       return Cond;
6135     }
6136     if (C1->isAllOnesValue() && C2->isNullValue()) {
6137       // select Cond, -1, 0 --> sext (Cond)
6138       if (VT != MVT::i1)
6139         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6140       return Cond;
6141     }
6142
6143     // For any constants that differ by 1, we can transform the select into an
6144     // extend and add. Use a target hook because some targets may prefer to
6145     // transform in the other direction.
6146     if (TLI.convertSelectOfConstantsToMath()) {
6147       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6148         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6149         if (VT != MVT::i1)
6150           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6151         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6152       }
6153       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6154         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6155         if (VT != MVT::i1)
6156           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6157         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6158       }
6159     }
6160
6161     return SDValue();
6162   }
6163
6164   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6165   // We can't do this reliably if integer based booleans have different contents
6166   // to floating point based booleans. This is because we can't tell whether we
6167   // have an integer-based boolean or a floating-point-based boolean unless we
6168   // can find the SETCC that produced it and inspect its operands. This is
6169   // fairly easy if C is the SETCC node, but it can potentially be
6170   // undiscoverable (or not reasonably discoverable). For example, it could be
6171   // in another basic block or it could require searching a complicated
6172   // expression.
6173   if (CondVT.isInteger() &&
6174       TLI.getBooleanContents(false, true) ==
6175           TargetLowering::ZeroOrOneBooleanContent &&
6176       TLI.getBooleanContents(false, false) ==
6177           TargetLowering::ZeroOrOneBooleanContent &&
6178       C1->isNullValue() && C2->isOne()) {
6179     SDValue NotCond =
6180         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6181     if (VT.bitsEq(CondVT))
6182       return NotCond;
6183     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6184   }
6185
6186   return SDValue();
6187 }
6188
6189 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6190   SDValue N0 = N->getOperand(0);
6191   SDValue N1 = N->getOperand(1);
6192   SDValue N2 = N->getOperand(2);
6193   EVT VT = N->getValueType(0);
6194   EVT VT0 = N0.getValueType();
6195
6196   // fold (select C, X, X) -> X
6197   if (N1 == N2)
6198     return N1;
6199   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6200     // fold (select true, X, Y) -> X
6201     // fold (select false, X, Y) -> Y
6202     return !N0C->isNullValue() ? N1 : N2;
6203   }
6204   // fold (select X, X, Y) -> (or X, Y)
6205   // fold (select X, 1, Y) -> (or C, Y)
6206   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6207     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
6208
6209   if (SDValue V = foldSelectOfConstants(N))
6210     return V;
6211
6212   // fold (select C, 0, X) -> (and (not C), X)
6213   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6214     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6215     AddToWorklist(NOTNode.getNode());
6216     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
6217   }
6218   // fold (select C, X, 1) -> (or (not C), X)
6219   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6220     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6221     AddToWorklist(NOTNode.getNode());
6222     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
6223   }
6224   // fold (select X, Y, X) -> (and X, Y)
6225   // fold (select X, Y, 0) -> (and X, Y)
6226   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6227     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
6228
6229   // If we can fold this based on the true/false value, do so.
6230   if (SimplifySelectOps(N, N1, N2))
6231     return SDValue(N, 0);  // Don't revisit N.
6232
6233   if (VT0 == MVT::i1) {
6234     // The code in this block deals with the following 2 equivalences:
6235     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6236     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6237     // The target can specify its preferred form with the
6238     // shouldNormalizeToSelectSequence() callback. However we always transform
6239     // to the right anyway if we find the inner select exists in the DAG anyway
6240     // and we always transform to the left side if we know that we can further
6241     // optimize the combination of the conditions.
6242     bool normalizeToSequence
6243       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6244     // select (and Cond0, Cond1), X, Y
6245     //   -> select Cond0, (select Cond1, X, Y), Y
6246     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6247       SDValue Cond0 = N0->getOperand(0);
6248       SDValue Cond1 = N0->getOperand(1);
6249       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6250                                         N1.getValueType(), Cond1, N1, N2);
6251       if (normalizeToSequence || !InnerSelect.use_empty())
6252         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
6253                            InnerSelect, N2);
6254     }
6255     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6256     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6257       SDValue Cond0 = N0->getOperand(0);
6258       SDValue Cond1 = N0->getOperand(1);
6259       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6260                                         N1.getValueType(), Cond1, N1, N2);
6261       if (normalizeToSequence || !InnerSelect.use_empty())
6262         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
6263                            InnerSelect);
6264     }
6265
6266     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6267     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6268       SDValue N1_0 = N1->getOperand(0);
6269       SDValue N1_1 = N1->getOperand(1);
6270       SDValue N1_2 = N1->getOperand(2);
6271       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6272         // Create the actual and node if we can generate good code for it.
6273         if (!normalizeToSequence) {
6274           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
6275                                     N0, N1_0);
6276           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
6277                              N1_1, N2);
6278         }
6279         // Otherwise see if we can optimize the "and" to a better pattern.
6280         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6281           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6282                              N1_1, N2);
6283       }
6284     }
6285     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6286     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6287       SDValue N2_0 = N2->getOperand(0);
6288       SDValue N2_1 = N2->getOperand(1);
6289       SDValue N2_2 = N2->getOperand(2);
6290       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6291         // Create the actual or node if we can generate good code for it.
6292         if (!normalizeToSequence) {
6293           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
6294                                    N0, N2_0);
6295           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
6296                              N1, N2_2);
6297         }
6298         // Otherwise see if we can optimize to a better pattern.
6299         if (SDValue Combined = visitORLike(N0, N2_0, N))
6300           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6301                              N1, N2_2);
6302       }
6303     }
6304   }
6305
6306   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6307   if (VT0 == MVT::i1) {
6308     if (N0->getOpcode() == ISD::XOR) {
6309       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6310         SDValue Cond0 = N0->getOperand(0);
6311         if (C->isOne())
6312           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
6313                              Cond0, N2, N1);
6314       }
6315     }
6316   }
6317
6318   // fold selects based on a setcc into other things, such as min/max/abs
6319   if (N0.getOpcode() == ISD::SETCC) {
6320     // select x, y (fcmp lt x, y) -> fminnum x, y
6321     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6322     //
6323     // This is OK if we don't care about what happens if either operand is a
6324     // NaN.
6325     //
6326
6327     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6328     // no signed zeros as well as no nans.
6329     const TargetOptions &Options = DAG.getTarget().Options;
6330     if (Options.UnsafeFPMath &&
6331         VT.isFloatingPoint() && N0.hasOneUse() &&
6332         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6333       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6334
6335       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
6336                                                 N0.getOperand(1), N1, N2, CC,
6337                                                 TLI, DAG))
6338         return FMinMax;
6339     }
6340
6341     if ((!LegalOperations &&
6342          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6343         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6344       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
6345                          N0.getOperand(0), N0.getOperand(1),
6346                          N1, N2, N0.getOperand(2));
6347     return SimplifySelect(SDLoc(N), N0, N1, N2);
6348   }
6349
6350   return SDValue();
6351 }
6352
6353 static
6354 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6355   SDLoc DL(N);
6356   EVT LoVT, HiVT;
6357   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6358
6359   // Split the inputs.
6360   SDValue Lo, Hi, LL, LH, RL, RH;
6361   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6362   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6363
6364   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6365   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6366
6367   return std::make_pair(Lo, Hi);
6368 }
6369
6370 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6371 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6372 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6373   SDLoc DL(N);
6374   SDValue Cond = N->getOperand(0);
6375   SDValue LHS = N->getOperand(1);
6376   SDValue RHS = N->getOperand(2);
6377   EVT VT = N->getValueType(0);
6378   int NumElems = VT.getVectorNumElements();
6379   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6380          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6381          Cond.getOpcode() == ISD::BUILD_VECTOR);
6382
6383   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6384   // binary ones here.
6385   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6386     return SDValue();
6387
6388   // We're sure we have an even number of elements due to the
6389   // concat_vectors we have as arguments to vselect.
6390   // Skip BV elements until we find one that's not an UNDEF
6391   // After we find an UNDEF element, keep looping until we get to half the
6392   // length of the BV and see if all the non-undef nodes are the same.
6393   ConstantSDNode *BottomHalf = nullptr;
6394   for (int i = 0; i < NumElems / 2; ++i) {
6395     if (Cond->getOperand(i)->isUndef())
6396       continue;
6397
6398     if (BottomHalf == nullptr)
6399       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6400     else if (Cond->getOperand(i).getNode() != BottomHalf)
6401       return SDValue();
6402   }
6403
6404   // Do the same for the second half of the BuildVector
6405   ConstantSDNode *TopHalf = nullptr;
6406   for (int i = NumElems / 2; i < NumElems; ++i) {
6407     if (Cond->getOperand(i)->isUndef())
6408       continue;
6409
6410     if (TopHalf == nullptr)
6411       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6412     else if (Cond->getOperand(i).getNode() != TopHalf)
6413       return SDValue();
6414   }
6415
6416   assert(TopHalf && BottomHalf &&
6417          "One half of the selector was all UNDEFs and the other was all the "
6418          "same value. This should have been addressed before this function.");
6419   return DAG.getNode(
6420       ISD::CONCAT_VECTORS, DL, VT,
6421       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6422       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6423 }
6424
6425 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6426
6427   if (Level >= AfterLegalizeTypes)
6428     return SDValue();
6429
6430   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6431   SDValue Mask = MSC->getMask();
6432   SDValue Data  = MSC->getValue();
6433   SDLoc DL(N);
6434
6435   // If the MSCATTER data type requires splitting and the mask is provided by a
6436   // SETCC, then split both nodes and its operands before legalization. This
6437   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6438   // and enables future optimizations (e.g. min/max pattern matching on X86).
6439   if (Mask.getOpcode() != ISD::SETCC)
6440     return SDValue();
6441
6442   // Check if any splitting is required.
6443   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6444       TargetLowering::TypeSplitVector)
6445     return SDValue();
6446   SDValue MaskLo, MaskHi, Lo, Hi;
6447   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6448
6449   EVT LoVT, HiVT;
6450   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6451
6452   SDValue Chain = MSC->getChain();
6453
6454   EVT MemoryVT = MSC->getMemoryVT();
6455   unsigned Alignment = MSC->getOriginalAlignment();
6456
6457   EVT LoMemVT, HiMemVT;
6458   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6459
6460   SDValue DataLo, DataHi;
6461   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6462
6463   SDValue BasePtr = MSC->getBasePtr();
6464   SDValue IndexLo, IndexHi;
6465   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6466
6467   MachineMemOperand *MMO = DAG.getMachineFunction().
6468     getMachineMemOperand(MSC->getPointerInfo(),
6469                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6470                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6471
6472   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6473   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6474                             DL, OpsLo, MMO);
6475
6476   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6477   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6478                             DL, OpsHi, MMO);
6479
6480   AddToWorklist(Lo.getNode());
6481   AddToWorklist(Hi.getNode());
6482
6483   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6484 }
6485
6486 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6487
6488   if (Level >= AfterLegalizeTypes)
6489     return SDValue();
6490
6491   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6492   SDValue Mask = MST->getMask();
6493   SDValue Data  = MST->getValue();
6494   EVT VT = Data.getValueType();
6495   SDLoc DL(N);
6496
6497   // If the MSTORE data type requires splitting and the mask is provided by a
6498   // SETCC, then split both nodes and its operands before legalization. This
6499   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6500   // and enables future optimizations (e.g. min/max pattern matching on X86).
6501   if (Mask.getOpcode() == ISD::SETCC) {
6502
6503     // Check if any splitting is required.
6504     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6505         TargetLowering::TypeSplitVector)
6506       return SDValue();
6507
6508     SDValue MaskLo, MaskHi, Lo, Hi;
6509     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6510
6511     SDValue Chain = MST->getChain();
6512     SDValue Ptr   = MST->getBasePtr();
6513
6514     EVT MemoryVT = MST->getMemoryVT();
6515     unsigned Alignment = MST->getOriginalAlignment();
6516
6517     // if Alignment is equal to the vector size,
6518     // take the half of it for the second part
6519     unsigned SecondHalfAlignment =
6520       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6521
6522     EVT LoMemVT, HiMemVT;
6523     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6524
6525     SDValue DataLo, DataHi;
6526     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6527
6528     MachineMemOperand *MMO = DAG.getMachineFunction().
6529       getMachineMemOperand(MST->getPointerInfo(),
6530                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6531                            Alignment, MST->getAAInfo(), MST->getRanges());
6532
6533     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6534                             MST->isTruncatingStore(),
6535                             MST->isCompressingStore());
6536
6537     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6538                                      MST->isCompressingStore());
6539
6540     MMO = DAG.getMachineFunction().
6541       getMachineMemOperand(MST->getPointerInfo(),
6542                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6543                            SecondHalfAlignment, MST->getAAInfo(),
6544                            MST->getRanges());
6545
6546     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6547                             MST->isTruncatingStore(),
6548                             MST->isCompressingStore());
6549
6550     AddToWorklist(Lo.getNode());
6551     AddToWorklist(Hi.getNode());
6552
6553     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6554   }
6555   return SDValue();
6556 }
6557
6558 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6559
6560   if (Level >= AfterLegalizeTypes)
6561     return SDValue();
6562
6563   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6564   SDValue Mask = MGT->getMask();
6565   SDLoc DL(N);
6566
6567   // If the MGATHER result requires splitting and the mask is provided by a
6568   // SETCC, then split both nodes and its operands before legalization. This
6569   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6570   // and enables future optimizations (e.g. min/max pattern matching on X86).
6571
6572   if (Mask.getOpcode() != ISD::SETCC)
6573     return SDValue();
6574
6575   EVT VT = N->getValueType(0);
6576
6577   // Check if any splitting is required.
6578   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6579       TargetLowering::TypeSplitVector)
6580     return SDValue();
6581
6582   SDValue MaskLo, MaskHi, Lo, Hi;
6583   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6584
6585   SDValue Src0 = MGT->getValue();
6586   SDValue Src0Lo, Src0Hi;
6587   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6588
6589   EVT LoVT, HiVT;
6590   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6591
6592   SDValue Chain = MGT->getChain();
6593   EVT MemoryVT = MGT->getMemoryVT();
6594   unsigned Alignment = MGT->getOriginalAlignment();
6595
6596   EVT LoMemVT, HiMemVT;
6597   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6598
6599   SDValue BasePtr = MGT->getBasePtr();
6600   SDValue Index = MGT->getIndex();
6601   SDValue IndexLo, IndexHi;
6602   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6603
6604   MachineMemOperand *MMO = DAG.getMachineFunction().
6605     getMachineMemOperand(MGT->getPointerInfo(),
6606                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6607                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6608
6609   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6610   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6611                             MMO);
6612
6613   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6614   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6615                             MMO);
6616
6617   AddToWorklist(Lo.getNode());
6618   AddToWorklist(Hi.getNode());
6619
6620   // Build a factor node to remember that this load is independent of the
6621   // other one.
6622   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6623                       Hi.getValue(1));
6624
6625   // Legalized the chain result - switch anything that used the old chain to
6626   // use the new one.
6627   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6628
6629   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6630
6631   SDValue RetOps[] = { GatherRes, Chain };
6632   return DAG.getMergeValues(RetOps, DL);
6633 }
6634
6635 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6636
6637   if (Level >= AfterLegalizeTypes)
6638     return SDValue();
6639
6640   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6641   SDValue Mask = MLD->getMask();
6642   SDLoc DL(N);
6643
6644   // If the MLOAD result requires splitting and the mask is provided by a
6645   // SETCC, then split both nodes and its operands before legalization. This
6646   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6647   // and enables future optimizations (e.g. min/max pattern matching on X86).
6648
6649   if (Mask.getOpcode() == ISD::SETCC) {
6650     EVT VT = N->getValueType(0);
6651
6652     // Check if any splitting is required.
6653     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6654         TargetLowering::TypeSplitVector)
6655       return SDValue();
6656
6657     SDValue MaskLo, MaskHi, Lo, Hi;
6658     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6659
6660     SDValue Src0 = MLD->getSrc0();
6661     SDValue Src0Lo, Src0Hi;
6662     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6663
6664     EVT LoVT, HiVT;
6665     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6666
6667     SDValue Chain = MLD->getChain();
6668     SDValue Ptr   = MLD->getBasePtr();
6669     EVT MemoryVT = MLD->getMemoryVT();
6670     unsigned Alignment = MLD->getOriginalAlignment();
6671
6672     // if Alignment is equal to the vector size,
6673     // take the half of it for the second part
6674     unsigned SecondHalfAlignment =
6675       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6676          Alignment/2 : Alignment;
6677
6678     EVT LoMemVT, HiMemVT;
6679     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6680
6681     MachineMemOperand *MMO = DAG.getMachineFunction().
6682     getMachineMemOperand(MLD->getPointerInfo(),
6683                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6684                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6685
6686     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6687                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6688
6689     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6690                                      MLD->isExpandingLoad());
6691
6692     MMO = DAG.getMachineFunction().
6693     getMachineMemOperand(MLD->getPointerInfo(),
6694                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6695                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6696
6697     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6698                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6699
6700     AddToWorklist(Lo.getNode());
6701     AddToWorklist(Hi.getNode());
6702
6703     // Build a factor node to remember that this load is independent of the
6704     // other one.
6705     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6706                         Hi.getValue(1));
6707
6708     // Legalized the chain result - switch anything that used the old chain to
6709     // use the new one.
6710     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6711
6712     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6713
6714     SDValue RetOps[] = { LoadRes, Chain };
6715     return DAG.getMergeValues(RetOps, DL);
6716   }
6717   return SDValue();
6718 }
6719
6720 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6721   SDValue N0 = N->getOperand(0);
6722   SDValue N1 = N->getOperand(1);
6723   SDValue N2 = N->getOperand(2);
6724   SDLoc DL(N);
6725
6726   // fold (vselect C, X, X) -> X
6727   if (N1 == N2)
6728     return N1;
6729
6730   // Canonicalize integer abs.
6731   // vselect (setg[te] X,  0),  X, -X ->
6732   // vselect (setgt    X, -1),  X, -X ->
6733   // vselect (setl[te] X,  0), -X,  X ->
6734   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6735   if (N0.getOpcode() == ISD::SETCC) {
6736     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6737     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6738     bool isAbs = false;
6739     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6740
6741     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6742          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6743         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6744       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6745     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6746              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6747       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6748
6749     if (isAbs) {
6750       EVT VT = LHS.getValueType();
6751       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6752         return DAG.getNode(ISD::ABS, DL, VT, LHS);
6753
6754       SDValue Shift = DAG.getNode(
6755           ISD::SRA, DL, VT, LHS,
6756           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6757       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6758       AddToWorklist(Shift.getNode());
6759       AddToWorklist(Add.getNode());
6760       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6761     }
6762   }
6763
6764   if (SimplifySelectOps(N, N1, N2))
6765     return SDValue(N, 0);  // Don't revisit N.
6766
6767   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6768   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6769     return N1;
6770   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6771   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6772     return N2;
6773
6774   // The ConvertSelectToConcatVector function is assuming both the above
6775   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6776   // and addressed.
6777   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6778       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6779       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6780     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6781       return CV;
6782   }
6783
6784   return SDValue();
6785 }
6786
6787 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6788   SDValue N0 = N->getOperand(0);
6789   SDValue N1 = N->getOperand(1);
6790   SDValue N2 = N->getOperand(2);
6791   SDValue N3 = N->getOperand(3);
6792   SDValue N4 = N->getOperand(4);
6793   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6794
6795   // fold select_cc lhs, rhs, x, x, cc -> x
6796   if (N2 == N3)
6797     return N2;
6798
6799   // Determine if the condition we're dealing with is constant
6800   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6801                                   CC, SDLoc(N), false)) {
6802     AddToWorklist(SCC.getNode());
6803
6804     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6805       if (!SCCC->isNullValue())
6806         return N2;    // cond always true -> true val
6807       else
6808         return N3;    // cond always false -> false val
6809     } else if (SCC->isUndef()) {
6810       // When the condition is UNDEF, just return the first operand. This is
6811       // coherent the DAG creation, no setcc node is created in this case
6812       return N2;
6813     } else if (SCC.getOpcode() == ISD::SETCC) {
6814       // Fold to a simpler select_cc
6815       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6816                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6817                          SCC.getOperand(2));
6818     }
6819   }
6820
6821   // If we can fold this based on the true/false value, do so.
6822   if (SimplifySelectOps(N, N2, N3))
6823     return SDValue(N, 0);  // Don't revisit N.
6824
6825   // fold select_cc into other things, such as min/max/abs
6826   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6827 }
6828
6829 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6830   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6831                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6832                        SDLoc(N));
6833 }
6834
6835 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6836   SDValue LHS = N->getOperand(0);
6837   SDValue RHS = N->getOperand(1);
6838   SDValue Carry = N->getOperand(2);
6839   SDValue Cond = N->getOperand(3);
6840
6841   // If Carry is false, fold to a regular SETCC.
6842   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6843     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6844
6845   return SDValue();
6846 }
6847
6848 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6849   SDValue LHS = N->getOperand(0);
6850   SDValue RHS = N->getOperand(1);
6851   SDValue Carry = N->getOperand(2);
6852   SDValue Cond = N->getOperand(3);
6853
6854   // If Carry is false, fold to a regular SETCC.
6855   if (isNullConstant(Carry))
6856     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6857
6858   return SDValue();
6859 }
6860
6861 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6862 /// a build_vector of constants.
6863 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6864 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6865 /// Vector extends are not folded if operations are legal; this is to
6866 /// avoid introducing illegal build_vector dag nodes.
6867 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6868                                          SelectionDAG &DAG, bool LegalTypes,
6869                                          bool LegalOperations) {
6870   unsigned Opcode = N->getOpcode();
6871   SDValue N0 = N->getOperand(0);
6872   EVT VT = N->getValueType(0);
6873
6874   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6875          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6876          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6877          && "Expected EXTEND dag node in input!");
6878
6879   // fold (sext c1) -> c1
6880   // fold (zext c1) -> c1
6881   // fold (aext c1) -> c1
6882   if (isa<ConstantSDNode>(N0))
6883     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6884
6885   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6886   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6887   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6888   EVT SVT = VT.getScalarType();
6889   if (!(VT.isVector() &&
6890       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6891       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6892     return nullptr;
6893
6894   // We can fold this node into a build_vector.
6895   unsigned VTBits = SVT.getSizeInBits();
6896   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6897   SmallVector<SDValue, 8> Elts;
6898   unsigned NumElts = VT.getVectorNumElements();
6899   SDLoc DL(N);
6900
6901   for (unsigned i=0; i != NumElts; ++i) {
6902     SDValue Op = N0->getOperand(i);
6903     if (Op->isUndef()) {
6904       Elts.push_back(DAG.getUNDEF(SVT));
6905       continue;
6906     }
6907
6908     SDLoc DL(Op);
6909     // Get the constant value and if needed trunc it to the size of the type.
6910     // Nodes like build_vector might have constants wider than the scalar type.
6911     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6912     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6913       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6914     else
6915       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6916   }
6917
6918   return DAG.getBuildVector(VT, DL, Elts).getNode();
6919 }
6920
6921 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6922 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6923 // transformation. Returns true if extension are possible and the above
6924 // mentioned transformation is profitable.
6925 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
6926                                     unsigned ExtOpc,
6927                                     SmallVectorImpl<SDNode *> &ExtendNodes,
6928                                     const TargetLowering &TLI) {
6929   bool HasCopyToRegUses = false;
6930   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6931   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6932                             UE = N0.getNode()->use_end();
6933        UI != UE; ++UI) {
6934     SDNode *User = *UI;
6935     if (User == N)
6936       continue;
6937     if (UI.getUse().getResNo() != N0.getResNo())
6938       continue;
6939     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6940     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6941       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6942       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6943         // Sign bits will be lost after a zext.
6944         return false;
6945       bool Add = false;
6946       for (unsigned i = 0; i != 2; ++i) {
6947         SDValue UseOp = User->getOperand(i);
6948         if (UseOp == N0)
6949           continue;
6950         if (!isa<ConstantSDNode>(UseOp))
6951           return false;
6952         Add = true;
6953       }
6954       if (Add)
6955         ExtendNodes.push_back(User);
6956       continue;
6957     }
6958     // If truncates aren't free and there are users we can't
6959     // extend, it isn't worthwhile.
6960     if (!isTruncFree)
6961       return false;
6962     // Remember if this value is live-out.
6963     if (User->getOpcode() == ISD::CopyToReg)
6964       HasCopyToRegUses = true;
6965   }
6966
6967   if (HasCopyToRegUses) {
6968     bool BothLiveOut = false;
6969     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6970          UI != UE; ++UI) {
6971       SDUse &Use = UI.getUse();
6972       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6973         BothLiveOut = true;
6974         break;
6975       }
6976     }
6977     if (BothLiveOut)
6978       // Both unextended and extended values are live out. There had better be
6979       // a good reason for the transformation.
6980       return ExtendNodes.size();
6981   }
6982   return true;
6983 }
6984
6985 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6986                                   SDValue Trunc, SDValue ExtLoad,
6987                                   const SDLoc &DL, ISD::NodeType ExtType) {
6988   // Extend SetCC uses if necessary.
6989   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6990     SDNode *SetCC = SetCCs[i];
6991     SmallVector<SDValue, 4> Ops;
6992
6993     for (unsigned j = 0; j != 2; ++j) {
6994       SDValue SOp = SetCC->getOperand(j);
6995       if (SOp == Trunc)
6996         Ops.push_back(ExtLoad);
6997       else
6998         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6999     }
7000
7001     Ops.push_back(SetCC->getOperand(2));
7002     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7003   }
7004 }
7005
7006 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7007 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7008   SDValue N0 = N->getOperand(0);
7009   EVT DstVT = N->getValueType(0);
7010   EVT SrcVT = N0.getValueType();
7011
7012   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7013           N->getOpcode() == ISD::ZERO_EXTEND) &&
7014          "Unexpected node type (not an extend)!");
7015
7016   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7017   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7018   //   (v8i32 (sext (v8i16 (load x))))
7019   // into:
7020   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7021   //                          (v4i32 (sextload (x + 16)))))
7022   // Where uses of the original load, i.e.:
7023   //   (v8i16 (load x))
7024   // are replaced with:
7025   //   (v8i16 (truncate
7026   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7027   //                            (v4i32 (sextload (x + 16)))))))
7028   //
7029   // This combine is only applicable to illegal, but splittable, vectors.
7030   // All legal types, and illegal non-vector types, are handled elsewhere.
7031   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7032   //
7033   if (N0->getOpcode() != ISD::LOAD)
7034     return SDValue();
7035
7036   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7037
7038   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7039       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7040       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7041     return SDValue();
7042
7043   SmallVector<SDNode *, 4> SetCCs;
7044   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
7045     return SDValue();
7046
7047   ISD::LoadExtType ExtType =
7048       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7049
7050   // Try to split the vector types to get down to legal types.
7051   EVT SplitSrcVT = SrcVT;
7052   EVT SplitDstVT = DstVT;
7053   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7054          SplitSrcVT.getVectorNumElements() > 1) {
7055     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7056     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7057   }
7058
7059   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7060     return SDValue();
7061
7062   SDLoc DL(N);
7063   const unsigned NumSplits =
7064       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7065   const unsigned Stride = SplitSrcVT.getStoreSize();
7066   SmallVector<SDValue, 4> Loads;
7067   SmallVector<SDValue, 4> Chains;
7068
7069   SDValue BasePtr = LN0->getBasePtr();
7070   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7071     const unsigned Offset = Idx * Stride;
7072     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7073
7074     SDValue SplitLoad = DAG.getExtLoad(
7075         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7076         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7077         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7078
7079     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7080                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7081
7082     Loads.push_back(SplitLoad.getValue(0));
7083     Chains.push_back(SplitLoad.getValue(1));
7084   }
7085
7086   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7087   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7088
7089   // Simplify TF.
7090   AddToWorklist(NewChain.getNode());
7091
7092   CombineTo(N, NewValue);
7093
7094   // Replace uses of the original load (before extension)
7095   // with a truncate of the concatenated sextloaded vectors.
7096   SDValue Trunc =
7097       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7098   CombineTo(N0.getNode(), Trunc, NewChain);
7099   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7100                   (ISD::NodeType)N->getOpcode());
7101   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7102 }
7103
7104 /// If we're narrowing or widening the result of a vector select and the final
7105 /// size is the same size as a setcc (compare) feeding the select, then try to
7106 /// apply the cast operation to the select's operands because matching vector
7107 /// sizes for a select condition and other operands should be more efficient.
7108 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7109   unsigned CastOpcode = Cast->getOpcode();
7110   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7111           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7112           CastOpcode == ISD::FP_ROUND) &&
7113          "Unexpected opcode for vector select narrowing/widening");
7114
7115   // We only do this transform before legal ops because the pattern may be
7116   // obfuscated by target-specific operations after legalization. Do not create
7117   // an illegal select op, however, because that may be difficult to lower.
7118   EVT VT = Cast->getValueType(0);
7119   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7120     return SDValue();
7121
7122   SDValue VSel = Cast->getOperand(0);
7123   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7124       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7125     return SDValue();
7126
7127   // Does the setcc have the same vector size as the casted select?
7128   SDValue SetCC = VSel.getOperand(0);
7129   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7130   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7131     return SDValue();
7132
7133   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7134   SDValue A = VSel.getOperand(1);
7135   SDValue B = VSel.getOperand(2);
7136   SDValue CastA, CastB;
7137   SDLoc DL(Cast);
7138   if (CastOpcode == ISD::FP_ROUND) {
7139     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7140     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7141     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7142   } else {
7143     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7144     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7145   }
7146   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7147 }
7148
7149 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7150   SDValue N0 = N->getOperand(0);
7151   EVT VT = N->getValueType(0);
7152   SDLoc DL(N);
7153
7154   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7155                                               LegalOperations))
7156     return SDValue(Res, 0);
7157
7158   // fold (sext (sext x)) -> (sext x)
7159   // fold (sext (aext x)) -> (sext x)
7160   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7161     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7162
7163   if (N0.getOpcode() == ISD::TRUNCATE) {
7164     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7165     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7166     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7167       SDNode *oye = N0.getOperand(0).getNode();
7168       if (NarrowLoad.getNode() != N0.getNode()) {
7169         CombineTo(N0.getNode(), NarrowLoad);
7170         // CombineTo deleted the truncate, if needed, but not what's under it.
7171         AddToWorklist(oye);
7172       }
7173       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7174     }
7175
7176     // See if the value being truncated is already sign extended.  If so, just
7177     // eliminate the trunc/sext pair.
7178     SDValue Op = N0.getOperand(0);
7179     unsigned OpBits   = Op.getScalarValueSizeInBits();
7180     unsigned MidBits  = N0.getScalarValueSizeInBits();
7181     unsigned DestBits = VT.getScalarSizeInBits();
7182     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7183
7184     if (OpBits == DestBits) {
7185       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7186       // bits, it is already ready.
7187       if (NumSignBits > DestBits-MidBits)
7188         return Op;
7189     } else if (OpBits < DestBits) {
7190       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7191       // bits, just sext from i32.
7192       if (NumSignBits > OpBits-MidBits)
7193         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7194     } else {
7195       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7196       // bits, just truncate to i32.
7197       if (NumSignBits > OpBits-MidBits)
7198         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7199     }
7200
7201     // fold (sext (truncate x)) -> (sextinreg x).
7202     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7203                                                  N0.getValueType())) {
7204       if (OpBits < DestBits)
7205         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7206       else if (OpBits > DestBits)
7207         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7208       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7209                          DAG.getValueType(N0.getValueType()));
7210     }
7211   }
7212
7213   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7214   // Only generate vector extloads when 1) they're legal, and 2) they are
7215   // deemed desirable by the target.
7216   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7217       ((!LegalOperations && !VT.isVector() &&
7218         !cast<LoadSDNode>(N0)->isVolatile()) ||
7219        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7220     bool DoXform = true;
7221     SmallVector<SDNode*, 4> SetCCs;
7222     if (!N0.hasOneUse())
7223       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7224     if (VT.isVector())
7225       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7226     if (DoXform) {
7227       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7228       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7229                                        LN0->getBasePtr(), N0.getValueType(),
7230                                        LN0->getMemOperand());
7231       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7232                                   N0.getValueType(), ExtLoad);
7233       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7234       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7235       return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
7236     }
7237   }
7238
7239   // fold (sext (load x)) to multiple smaller sextloads.
7240   // Only on illegal but splittable vectors.
7241   if (SDValue ExtLoad = CombineExtLoad(N))
7242     return ExtLoad;
7243
7244   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7245   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7246   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7247       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7248     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7249     EVT MemVT = LN0->getMemoryVT();
7250     if ((!LegalOperations && !LN0->isVolatile()) ||
7251         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7252       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7253                                        LN0->getBasePtr(), MemVT,
7254                                        LN0->getMemOperand());
7255       CombineTo(N, ExtLoad);
7256       CombineTo(N0.getNode(),
7257                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7258                             N0.getValueType(), ExtLoad),
7259                 ExtLoad.getValue(1));
7260       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7261     }
7262   }
7263
7264   // fold (sext (and/or/xor (load x), cst)) ->
7265   //      (and/or/xor (sextload x), (sext cst))
7266   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7267        N0.getOpcode() == ISD::XOR) &&
7268       isa<LoadSDNode>(N0.getOperand(0)) &&
7269       N0.getOperand(1).getOpcode() == ISD::Constant &&
7270       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7271       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7272     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7273     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7274       bool DoXform = true;
7275       SmallVector<SDNode*, 4> SetCCs;
7276       if (!N0.hasOneUse())
7277         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7278                                           SetCCs, TLI);
7279       if (DoXform) {
7280         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7281                                          LN0->getChain(), LN0->getBasePtr(),
7282                                          LN0->getMemoryVT(),
7283                                          LN0->getMemOperand());
7284         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7285         Mask = Mask.sext(VT.getSizeInBits());
7286         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7287                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7288         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7289                                     SDLoc(N0.getOperand(0)),
7290                                     N0.getOperand(0).getValueType(), ExtLoad);
7291         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7292         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7293         return CombineTo(N, And); // Return N so it doesn't get rechecked!
7294       }
7295     }
7296   }
7297
7298   if (N0.getOpcode() == ISD::SETCC) {
7299     SDValue N00 = N0.getOperand(0);
7300     SDValue N01 = N0.getOperand(1);
7301     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7302     EVT N00VT = N0.getOperand(0).getValueType();
7303
7304     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7305     // Only do this before legalize for now.
7306     if (VT.isVector() && !LegalOperations &&
7307         TLI.getBooleanContents(N00VT) ==
7308             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7309       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7310       // of the same size as the compared operands. Only optimize sext(setcc())
7311       // if this is the case.
7312       EVT SVT = getSetCCResultType(N00VT);
7313
7314       // We know that the # elements of the results is the same as the
7315       // # elements of the compare (and the # elements of the compare result
7316       // for that matter).  Check to see that they are the same size.  If so,
7317       // we know that the element size of the sext'd result matches the
7318       // element size of the compare operands.
7319       if (VT.getSizeInBits() == SVT.getSizeInBits())
7320         return DAG.getSetCC(DL, VT, N00, N01, CC);
7321
7322       // If the desired elements are smaller or larger than the source
7323       // elements, we can use a matching integer vector type and then
7324       // truncate/sign extend.
7325       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7326       if (SVT == MatchingVecType) {
7327         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7328         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7329       }
7330     }
7331
7332     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7333     // Here, T can be 1 or -1, depending on the type of the setcc and
7334     // getBooleanContents().
7335     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7336
7337     // To determine the "true" side of the select, we need to know the high bit
7338     // of the value returned by the setcc if it evaluates to true.
7339     // If the type of the setcc is i1, then the true case of the select is just
7340     // sext(i1 1), that is, -1.
7341     // If the type of the setcc is larger (say, i8) then the value of the high
7342     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7343     // of the appropriate width.
7344     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7345                                            : TLI.getConstTrueVal(DAG, VT, DL);
7346     SDValue Zero = DAG.getConstant(0, DL, VT);
7347     if (SDValue SCC =
7348             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7349       return SCC;
7350
7351     if (!VT.isVector()) {
7352       EVT SetCCVT = getSetCCResultType(N00VT);
7353       // Don't do this transform for i1 because there's a select transform
7354       // that would reverse it.
7355       // TODO: We should not do this transform at all without a target hook
7356       // because a sext is likely cheaper than a select?
7357       if (SetCCVT.getScalarSizeInBits() != 1 &&
7358           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7359         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7360         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7361       }
7362     }
7363   }
7364
7365   // fold (sext x) -> (zext x) if the sign bit is known zero.
7366   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7367       DAG.SignBitIsZero(N0))
7368     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7369
7370   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7371     return NewVSel;
7372
7373   return SDValue();
7374 }
7375
7376 // isTruncateOf - If N is a truncate of some other value, return true, record
7377 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7378 // This function computes KnownBits to avoid a duplicated call to
7379 // computeKnownBits in the caller.
7380 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7381                          KnownBits &Known) {
7382   if (N->getOpcode() == ISD::TRUNCATE) {
7383     Op = N->getOperand(0);
7384     DAG.computeKnownBits(Op, Known);
7385     return true;
7386   }
7387
7388   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7389       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7390     return false;
7391
7392   SDValue Op0 = N->getOperand(0);
7393   SDValue Op1 = N->getOperand(1);
7394   assert(Op0.getValueType() == Op1.getValueType());
7395
7396   if (isNullConstant(Op0))
7397     Op = Op1;
7398   else if (isNullConstant(Op1))
7399     Op = Op0;
7400   else
7401     return false;
7402
7403   DAG.computeKnownBits(Op, Known);
7404
7405   if (!(Known.Zero | 1).isAllOnesValue())
7406     return false;
7407
7408   return true;
7409 }
7410
7411 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7412   SDValue N0 = N->getOperand(0);
7413   EVT VT = N->getValueType(0);
7414
7415   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7416                                               LegalOperations))
7417     return SDValue(Res, 0);
7418
7419   // fold (zext (zext x)) -> (zext x)
7420   // fold (zext (aext x)) -> (zext x)
7421   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7422     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7423                        N0.getOperand(0));
7424
7425   // fold (zext (truncate x)) -> (zext x) or
7426   //      (zext (truncate x)) -> (truncate x)
7427   // This is valid when the truncated bits of x are already zero.
7428   // FIXME: We should extend this to work for vectors too.
7429   SDValue Op;
7430   KnownBits Known;
7431   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7432     APInt TruncatedBits =
7433       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7434       APInt(Op.getValueSizeInBits(), 0) :
7435       APInt::getBitsSet(Op.getValueSizeInBits(),
7436                         N0.getValueSizeInBits(),
7437                         std::min(Op.getValueSizeInBits(),
7438                                  VT.getSizeInBits()));
7439     if (TruncatedBits.isSubsetOf(Known.Zero))
7440       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7441   }
7442
7443   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7444   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7445   if (N0.getOpcode() == ISD::TRUNCATE) {
7446     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7447       SDNode *oye = N0.getOperand(0).getNode();
7448       if (NarrowLoad.getNode() != N0.getNode()) {
7449         CombineTo(N0.getNode(), NarrowLoad);
7450         // CombineTo deleted the truncate, if needed, but not what's under it.
7451         AddToWorklist(oye);
7452       }
7453       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7454     }
7455   }
7456
7457   // fold (zext (truncate x)) -> (and x, mask)
7458   if (N0.getOpcode() == ISD::TRUNCATE) {
7459     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7460     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7461     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7462       SDNode *oye = N0.getOperand(0).getNode();
7463       if (NarrowLoad.getNode() != N0.getNode()) {
7464         CombineTo(N0.getNode(), NarrowLoad);
7465         // CombineTo deleted the truncate, if needed, but not what's under it.
7466         AddToWorklist(oye);
7467       }
7468       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7469     }
7470
7471     EVT SrcVT = N0.getOperand(0).getValueType();
7472     EVT MinVT = N0.getValueType();
7473
7474     // Try to mask before the extension to avoid having to generate a larger mask,
7475     // possibly over several sub-vectors.
7476     if (SrcVT.bitsLT(VT)) {
7477       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7478                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7479         SDValue Op = N0.getOperand(0);
7480         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7481         AddToWorklist(Op.getNode());
7482         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7483       }
7484     }
7485
7486     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7487       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7488       AddToWorklist(Op.getNode());
7489       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7490     }
7491   }
7492
7493   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7494   // if either of the casts is not free.
7495   if (N0.getOpcode() == ISD::AND &&
7496       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7497       N0.getOperand(1).getOpcode() == ISD::Constant &&
7498       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7499                            N0.getValueType()) ||
7500        !TLI.isZExtFree(N0.getValueType(), VT))) {
7501     SDValue X = N0.getOperand(0).getOperand(0);
7502     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7503     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7504     Mask = Mask.zext(VT.getSizeInBits());
7505     SDLoc DL(N);
7506     return DAG.getNode(ISD::AND, DL, VT,
7507                        X, DAG.getConstant(Mask, DL, VT));
7508   }
7509
7510   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7511   // Only generate vector extloads when 1) they're legal, and 2) they are
7512   // deemed desirable by the target.
7513   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7514       ((!LegalOperations && !VT.isVector() &&
7515         !cast<LoadSDNode>(N0)->isVolatile()) ||
7516        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7517     bool DoXform = true;
7518     SmallVector<SDNode*, 4> SetCCs;
7519     if (!N0.hasOneUse())
7520       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7521     if (VT.isVector())
7522       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7523     if (DoXform) {
7524       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7525       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7526                                        LN0->getChain(),
7527                                        LN0->getBasePtr(), N0.getValueType(),
7528                                        LN0->getMemOperand());
7529
7530       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7531                                   N0.getValueType(), ExtLoad);
7532       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7533       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7534       return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
7535     }
7536   }
7537
7538   // fold (zext (load x)) to multiple smaller zextloads.
7539   // Only on illegal but splittable vectors.
7540   if (SDValue ExtLoad = CombineExtLoad(N))
7541     return ExtLoad;
7542
7543   // fold (zext (and/or/xor (load x), cst)) ->
7544   //      (and/or/xor (zextload x), (zext cst))
7545   // Unless (and (load x) cst) will match as a zextload already and has
7546   // additional users.
7547   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7548        N0.getOpcode() == ISD::XOR) &&
7549       isa<LoadSDNode>(N0.getOperand(0)) &&
7550       N0.getOperand(1).getOpcode() == ISD::Constant &&
7551       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7552       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7553     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7554     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7555       bool DoXform = true;
7556       SmallVector<SDNode*, 4> SetCCs;
7557       if (!N0.hasOneUse()) {
7558         if (N0.getOpcode() == ISD::AND) {
7559           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7560           auto NarrowLoad = false;
7561           EVT LoadResultTy = AndC->getValueType(0);
7562           EVT ExtVT, LoadedVT;
7563           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7564                                NarrowLoad))
7565             DoXform = false;
7566         }
7567         if (DoXform)
7568           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7569                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7570       }
7571       if (DoXform) {
7572         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7573                                          LN0->getChain(), LN0->getBasePtr(),
7574                                          LN0->getMemoryVT(),
7575                                          LN0->getMemOperand());
7576         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7577         Mask = Mask.zext(VT.getSizeInBits());
7578         SDLoc DL(N);
7579         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7580                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7581         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7582                                     SDLoc(N0.getOperand(0)),
7583                                     N0.getOperand(0).getValueType(), ExtLoad);
7584         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7585         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7586         return CombineTo(N, And); // Return N so it doesn't get rechecked!
7587       }
7588     }
7589   }
7590
7591   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7592   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7593   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7594       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7595     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7596     EVT MemVT = LN0->getMemoryVT();
7597     if ((!LegalOperations && !LN0->isVolatile()) ||
7598         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7599       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7600                                        LN0->getChain(),
7601                                        LN0->getBasePtr(), MemVT,
7602                                        LN0->getMemOperand());
7603       CombineTo(N, ExtLoad);
7604       CombineTo(N0.getNode(),
7605                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7606                             ExtLoad),
7607                 ExtLoad.getValue(1));
7608       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7609     }
7610   }
7611
7612   if (N0.getOpcode() == ISD::SETCC) {
7613     // Only do this before legalize for now.
7614     if (!LegalOperations && VT.isVector() &&
7615         N0.getValueType().getVectorElementType() == MVT::i1) {
7616       EVT N00VT = N0.getOperand(0).getValueType();
7617       if (getSetCCResultType(N00VT) == N0.getValueType())
7618         return SDValue();
7619
7620       // We know that the # elements of the results is the same as the #
7621       // elements of the compare (and the # elements of the compare result for
7622       // that matter). Check to see that they are the same size. If so, we know
7623       // that the element size of the sext'd result matches the element size of
7624       // the compare operands.
7625       SDLoc DL(N);
7626       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7627       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7628         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7629         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7630                                      N0.getOperand(1), N0.getOperand(2));
7631         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7632       }
7633
7634       // If the desired elements are smaller or larger than the source
7635       // elements we can use a matching integer vector type and then
7636       // truncate/sign extend.
7637       EVT MatchingElementType = EVT::getIntegerVT(
7638           *DAG.getContext(), N00VT.getScalarSizeInBits());
7639       EVT MatchingVectorType = EVT::getVectorVT(
7640           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7641       SDValue VsetCC =
7642           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7643                       N0.getOperand(1), N0.getOperand(2));
7644       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7645                          VecOnes);
7646     }
7647
7648     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7649     SDLoc DL(N);
7650     if (SDValue SCC = SimplifySelectCC(
7651             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7652             DAG.getConstant(0, DL, VT),
7653             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7654       return SCC;
7655   }
7656
7657   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7658   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7659       isa<ConstantSDNode>(N0.getOperand(1)) &&
7660       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7661       N0.hasOneUse()) {
7662     SDValue ShAmt = N0.getOperand(1);
7663     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7664     if (N0.getOpcode() == ISD::SHL) {
7665       SDValue InnerZExt = N0.getOperand(0);
7666       // If the original shl may be shifting out bits, do not perform this
7667       // transformation.
7668       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7669         InnerZExt.getOperand(0).getValueSizeInBits();
7670       if (ShAmtVal > KnownZeroBits)
7671         return SDValue();
7672     }
7673
7674     SDLoc DL(N);
7675
7676     // Ensure that the shift amount is wide enough for the shifted value.
7677     if (VT.getSizeInBits() >= 256)
7678       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7679
7680     return DAG.getNode(N0.getOpcode(), DL, VT,
7681                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7682                        ShAmt);
7683   }
7684
7685   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7686     return NewVSel;
7687
7688   return SDValue();
7689 }
7690
7691 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7692   SDValue N0 = N->getOperand(0);
7693   EVT VT = N->getValueType(0);
7694
7695   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7696                                               LegalOperations))
7697     return SDValue(Res, 0);
7698
7699   // fold (aext (aext x)) -> (aext x)
7700   // fold (aext (zext x)) -> (zext x)
7701   // fold (aext (sext x)) -> (sext x)
7702   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7703       N0.getOpcode() == ISD::ZERO_EXTEND ||
7704       N0.getOpcode() == ISD::SIGN_EXTEND)
7705     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7706
7707   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7708   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7709   if (N0.getOpcode() == ISD::TRUNCATE) {
7710     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7711       SDNode *oye = N0.getOperand(0).getNode();
7712       if (NarrowLoad.getNode() != N0.getNode()) {
7713         CombineTo(N0.getNode(), NarrowLoad);
7714         // CombineTo deleted the truncate, if needed, but not what's under it.
7715         AddToWorklist(oye);
7716       }
7717       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7718     }
7719   }
7720
7721   // fold (aext (truncate x))
7722   if (N0.getOpcode() == ISD::TRUNCATE)
7723     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7724
7725   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7726   // if the trunc is not free.
7727   if (N0.getOpcode() == ISD::AND &&
7728       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7729       N0.getOperand(1).getOpcode() == ISD::Constant &&
7730       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7731                           N0.getValueType())) {
7732     SDLoc DL(N);
7733     SDValue X = N0.getOperand(0).getOperand(0);
7734     X = DAG.getAnyExtOrTrunc(X, DL, VT);
7735     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7736     Mask = Mask.zext(VT.getSizeInBits());
7737     return DAG.getNode(ISD::AND, DL, VT,
7738                        X, DAG.getConstant(Mask, DL, VT));
7739   }
7740
7741   // fold (aext (load x)) -> (aext (truncate (extload x)))
7742   // None of the supported targets knows how to perform load and any_ext
7743   // on vectors in one instruction.  We only perform this transformation on
7744   // scalars.
7745   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7746       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7747       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7748     bool DoXform = true;
7749     SmallVector<SDNode*, 4> SetCCs;
7750     if (!N0.hasOneUse())
7751       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7752     if (DoXform) {
7753       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7754       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7755                                        LN0->getChain(),
7756                                        LN0->getBasePtr(), N0.getValueType(),
7757                                        LN0->getMemOperand());
7758       CombineTo(N, ExtLoad);
7759       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7760                                   N0.getValueType(), ExtLoad);
7761       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7762       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7763                       ISD::ANY_EXTEND);
7764       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7765     }
7766   }
7767
7768   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7769   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7770   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7771   if (N0.getOpcode() == ISD::LOAD &&
7772       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7773       N0.hasOneUse()) {
7774     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7775     ISD::LoadExtType ExtType = LN0->getExtensionType();
7776     EVT MemVT = LN0->getMemoryVT();
7777     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7778       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7779                                        VT, LN0->getChain(), LN0->getBasePtr(),
7780                                        MemVT, LN0->getMemOperand());
7781       CombineTo(N, ExtLoad);
7782       CombineTo(N0.getNode(),
7783                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7784                             N0.getValueType(), ExtLoad),
7785                 ExtLoad.getValue(1));
7786       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7787     }
7788   }
7789
7790   if (N0.getOpcode() == ISD::SETCC) {
7791     // For vectors:
7792     // aext(setcc) -> vsetcc
7793     // aext(setcc) -> truncate(vsetcc)
7794     // aext(setcc) -> aext(vsetcc)
7795     // Only do this before legalize for now.
7796     if (VT.isVector() && !LegalOperations) {
7797       EVT N0VT = N0.getOperand(0).getValueType();
7798         // We know that the # elements of the results is the same as the
7799         // # elements of the compare (and the # elements of the compare result
7800         // for that matter).  Check to see that they are the same size.  If so,
7801         // we know that the element size of the sext'd result matches the
7802         // element size of the compare operands.
7803       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7804         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7805                              N0.getOperand(1),
7806                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7807       // If the desired elements are smaller or larger than the source
7808       // elements we can use a matching integer vector type and then
7809       // truncate/any extend
7810       else {
7811         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7812         SDValue VsetCC =
7813           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7814                         N0.getOperand(1),
7815                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7816         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7817       }
7818     }
7819
7820     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7821     SDLoc DL(N);
7822     if (SDValue SCC = SimplifySelectCC(
7823             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7824             DAG.getConstant(0, DL, VT),
7825             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7826       return SCC;
7827   }
7828
7829   return SDValue();
7830 }
7831
7832 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7833   SDValue N0 = N->getOperand(0);
7834   SDValue N1 = N->getOperand(1);
7835   EVT EVT = cast<VTSDNode>(N1)->getVT();
7836
7837   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7838   if (N0.getOpcode() == ISD::AssertZext &&
7839       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7840     return N0;
7841
7842   return SDValue();
7843 }
7844
7845 /// See if the specified operand can be simplified with the knowledge that only
7846 /// the bits specified by Mask are used.  If so, return the simpler operand,
7847 /// otherwise return a null SDValue.
7848 ///
7849 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
7850 /// simplify nodes with multiple uses more aggressively.)
7851 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
7852   switch (V.getOpcode()) {
7853   default: break;
7854   case ISD::Constant: {
7855     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
7856     assert(CV && "Const value should be ConstSDNode.");
7857     const APInt &CVal = CV->getAPIntValue();
7858     APInt NewVal = CVal & Mask;
7859     if (NewVal != CVal)
7860       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
7861     break;
7862   }
7863   case ISD::OR:
7864   case ISD::XOR:
7865     // If the LHS or RHS don't contribute bits to the or, drop them.
7866     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
7867       return V.getOperand(1);
7868     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
7869       return V.getOperand(0);
7870     break;
7871   case ISD::SRL:
7872     // Only look at single-use SRLs.
7873     if (!V.getNode()->hasOneUse())
7874       break;
7875     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
7876       // See if we can recursively simplify the LHS.
7877       unsigned Amt = RHSC->getZExtValue();
7878
7879       // Watch out for shift count overflow though.
7880       if (Amt >= Mask.getBitWidth()) break;
7881       APInt NewMask = Mask << Amt;
7882       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
7883         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
7884                            SimplifyLHS, V.getOperand(1));
7885     }
7886     break;
7887   case ISD::AND: {
7888     // X & -1 -> X (ignoring bits which aren't demanded).
7889     ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
7890     if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
7891       return V.getOperand(0);
7892     break;
7893   }
7894   }
7895   return SDValue();
7896 }
7897
7898 /// If the result of a wider load is shifted to right of N  bits and then
7899 /// truncated to a narrower type and where N is a multiple of number of bits of
7900 /// the narrower type, transform it to a narrower load from address + N / num of
7901 /// bits of new type. If the result is to be extended, also fold the extension
7902 /// to form a extending load.
7903 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7904   unsigned Opc = N->getOpcode();
7905
7906   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7907   SDValue N0 = N->getOperand(0);
7908   EVT VT = N->getValueType(0);
7909   EVT ExtVT = VT;
7910
7911   // This transformation isn't valid for vector loads.
7912   if (VT.isVector())
7913     return SDValue();
7914
7915   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7916   // extended to VT.
7917   if (Opc == ISD::SIGN_EXTEND_INREG) {
7918     ExtType = ISD::SEXTLOAD;
7919     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7920   } else if (Opc == ISD::SRL) {
7921     // Another special-case: SRL is basically zero-extending a narrower value.
7922     ExtType = ISD::ZEXTLOAD;
7923     N0 = SDValue(N, 0);
7924     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7925     if (!N01) return SDValue();
7926     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7927                               VT.getSizeInBits() - N01->getZExtValue());
7928   }
7929   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7930     return SDValue();
7931
7932   unsigned EVTBits = ExtVT.getSizeInBits();
7933
7934   // Do not generate loads of non-round integer types since these can
7935   // be expensive (and would be wrong if the type is not byte sized).
7936   if (!ExtVT.isRound())
7937     return SDValue();
7938
7939   unsigned ShAmt = 0;
7940   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7941     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7942       ShAmt = N01->getZExtValue();
7943       // Is the shift amount a multiple of size of VT?
7944       if ((ShAmt & (EVTBits-1)) == 0) {
7945         N0 = N0.getOperand(0);
7946         // Is the load width a multiple of size of VT?
7947         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7948           return SDValue();
7949       }
7950
7951       // At this point, we must have a load or else we can't do the transform.
7952       if (!isa<LoadSDNode>(N0)) return SDValue();
7953
7954       // Because a SRL must be assumed to *need* to zero-extend the high bits
7955       // (as opposed to anyext the high bits), we can't combine the zextload
7956       // lowering of SRL and an sextload.
7957       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7958         return SDValue();
7959
7960       // If the shift amount is larger than the input type then we're not
7961       // accessing any of the loaded bytes.  If the load was a zextload/extload
7962       // then the result of the shift+trunc is zero/undef (handled elsewhere).
7963       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7964         return SDValue();
7965     }
7966   }
7967
7968   // If the load is shifted left (and the result isn't shifted back right),
7969   // we can fold the truncate through the shift.
7970   unsigned ShLeftAmt = 0;
7971   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7972       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7973     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7974       ShLeftAmt = N01->getZExtValue();
7975       N0 = N0.getOperand(0);
7976     }
7977   }
7978
7979   // If we haven't found a load, we can't narrow it.  Don't transform one with
7980   // multiple uses, this would require adding a new load.
7981   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7982     return SDValue();
7983
7984   // Don't change the width of a volatile load.
7985   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7986   if (LN0->isVolatile())
7987     return SDValue();
7988
7989   // Verify that we are actually reducing a load width here.
7990   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7991     return SDValue();
7992
7993   // For the transform to be legal, the load must produce only two values
7994   // (the value loaded and the chain).  Don't transform a pre-increment
7995   // load, for example, which produces an extra value.  Otherwise the
7996   // transformation is not equivalent, and the downstream logic to replace
7997   // uses gets things wrong.
7998   if (LN0->getNumValues() > 2)
7999     return SDValue();
8000
8001   // If the load that we're shrinking is an extload and we're not just
8002   // discarding the extension we can't simply shrink the load. Bail.
8003   // TODO: It would be possible to merge the extensions in some cases.
8004   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
8005       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
8006     return SDValue();
8007
8008   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
8009     return SDValue();
8010
8011   EVT PtrType = N0.getOperand(1).getValueType();
8012
8013   if (PtrType == MVT::Untyped || PtrType.isExtended())
8014     // It's not possible to generate a constant of extended or untyped type.
8015     return SDValue();
8016
8017   // For big endian targets, we need to adjust the offset to the pointer to
8018   // load the correct bytes.
8019   if (DAG.getDataLayout().isBigEndian()) {
8020     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8021     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8022     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8023   }
8024
8025   uint64_t PtrOff = ShAmt / 8;
8026   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8027   SDLoc DL(LN0);
8028   // The original load itself didn't wrap, so an offset within it doesn't.
8029   SDNodeFlags Flags;
8030   Flags.setNoUnsignedWrap(true);
8031   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8032                                PtrType, LN0->getBasePtr(),
8033                                DAG.getConstant(PtrOff, DL, PtrType),
8034                                Flags);
8035   AddToWorklist(NewPtr.getNode());
8036
8037   SDValue Load;
8038   if (ExtType == ISD::NON_EXTLOAD)
8039     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8040                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8041                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8042   else
8043     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8044                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8045                           NewAlign, LN0->getMemOperand()->getFlags(),
8046                           LN0->getAAInfo());
8047
8048   // Replace the old load's chain with the new load's chain.
8049   WorklistRemover DeadNodes(*this);
8050   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8051
8052   // Shift the result left, if we've swallowed a left shift.
8053   SDValue Result = Load;
8054   if (ShLeftAmt != 0) {
8055     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8056     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8057       ShImmTy = VT;
8058     // If the shift amount is as large as the result size (but, presumably,
8059     // no larger than the source) then the useful bits of the result are
8060     // zero; we can't simply return the shortened shift, because the result
8061     // of that operation is undefined.
8062     SDLoc DL(N0);
8063     if (ShLeftAmt >= VT.getSizeInBits())
8064       Result = DAG.getConstant(0, DL, VT);
8065     else
8066       Result = DAG.getNode(ISD::SHL, DL, VT,
8067                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8068   }
8069
8070   // Return the new loaded value.
8071   return Result;
8072 }
8073
8074 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8075   SDValue N0 = N->getOperand(0);
8076   SDValue N1 = N->getOperand(1);
8077   EVT VT = N->getValueType(0);
8078   EVT EVT = cast<VTSDNode>(N1)->getVT();
8079   unsigned VTBits = VT.getScalarSizeInBits();
8080   unsigned EVTBits = EVT.getScalarSizeInBits();
8081
8082   if (N0.isUndef())
8083     return DAG.getUNDEF(VT);
8084
8085   // fold (sext_in_reg c1) -> c1
8086   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8087     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8088
8089   // If the input is already sign extended, just drop the extension.
8090   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8091     return N0;
8092
8093   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8094   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8095       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8096     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8097                        N0.getOperand(0), N1);
8098
8099   // fold (sext_in_reg (sext x)) -> (sext x)
8100   // fold (sext_in_reg (aext x)) -> (sext x)
8101   // if x is small enough.
8102   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8103     SDValue N00 = N0.getOperand(0);
8104     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8105         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8106       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8107   }
8108
8109   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8110   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8111        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8112        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8113       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8114     if (!LegalOperations ||
8115         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8116       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8117   }
8118
8119   // fold (sext_in_reg (zext x)) -> (sext x)
8120   // iff we are extending the source sign bit.
8121   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8122     SDValue N00 = N0.getOperand(0);
8123     if (N00.getScalarValueSizeInBits() == EVTBits &&
8124         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8125       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8126   }
8127
8128   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8129   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8130     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8131
8132   // fold operands of sext_in_reg based on knowledge that the top bits are not
8133   // demanded.
8134   if (SimplifyDemandedBits(SDValue(N, 0)))
8135     return SDValue(N, 0);
8136
8137   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8138   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8139   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8140     return NarrowLoad;
8141
8142   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8143   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8144   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8145   if (N0.getOpcode() == ISD::SRL) {
8146     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8147       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8148         // We can turn this into an SRA iff the input to the SRL is already sign
8149         // extended enough.
8150         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8151         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8152           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8153                              N0.getOperand(0), N0.getOperand(1));
8154       }
8155   }
8156
8157   // fold (sext_inreg (extload x)) -> (sextload x)
8158   if (ISD::isEXTLoad(N0.getNode()) &&
8159       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8160       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8161       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8162        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8163     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8164     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8165                                      LN0->getChain(),
8166                                      LN0->getBasePtr(), EVT,
8167                                      LN0->getMemOperand());
8168     CombineTo(N, ExtLoad);
8169     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8170     AddToWorklist(ExtLoad.getNode());
8171     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8172   }
8173   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8174   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8175       N0.hasOneUse() &&
8176       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8177       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8178        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8179     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8180     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8181                                      LN0->getChain(),
8182                                      LN0->getBasePtr(), EVT,
8183                                      LN0->getMemOperand());
8184     CombineTo(N, ExtLoad);
8185     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8186     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8187   }
8188
8189   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8190   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8191     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8192                                            N0.getOperand(1), false))
8193       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8194                          BSwap, N1);
8195   }
8196
8197   return SDValue();
8198 }
8199
8200 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8201   SDValue N0 = N->getOperand(0);
8202   EVT VT = N->getValueType(0);
8203
8204   if (N0.isUndef())
8205     return DAG.getUNDEF(VT);
8206
8207   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8208                                               LegalOperations))
8209     return SDValue(Res, 0);
8210
8211   return SDValue();
8212 }
8213
8214 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8215   SDValue N0 = N->getOperand(0);
8216   EVT VT = N->getValueType(0);
8217
8218   if (N0.isUndef())
8219     return DAG.getUNDEF(VT);
8220
8221   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8222                                               LegalOperations))
8223     return SDValue(Res, 0);
8224
8225   return SDValue();
8226 }
8227
8228 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8229   SDValue N0 = N->getOperand(0);
8230   EVT VT = N->getValueType(0);
8231   bool isLE = DAG.getDataLayout().isLittleEndian();
8232
8233   // noop truncate
8234   if (N0.getValueType() == N->getValueType(0))
8235     return N0;
8236   // fold (truncate c1) -> c1
8237   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8238     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8239   // fold (truncate (truncate x)) -> (truncate x)
8240   if (N0.getOpcode() == ISD::TRUNCATE)
8241     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8242   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8243   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8244       N0.getOpcode() == ISD::SIGN_EXTEND ||
8245       N0.getOpcode() == ISD::ANY_EXTEND) {
8246     // if the source is smaller than the dest, we still need an extend.
8247     if (N0.getOperand(0).getValueType().bitsLT(VT))
8248       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8249     // if the source is larger than the dest, than we just need the truncate.
8250     if (N0.getOperand(0).getValueType().bitsGT(VT))
8251       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8252     // if the source and dest are the same type, we can drop both the extend
8253     // and the truncate.
8254     return N0.getOperand(0);
8255   }
8256
8257   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8258   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8259     return SDValue();
8260
8261   // Fold extract-and-trunc into a narrow extract. For example:
8262   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8263   //   i32 y = TRUNCATE(i64 x)
8264   //        -- becomes --
8265   //   v16i8 b = BITCAST (v2i64 val)
8266   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8267   //
8268   // Note: We only run this optimization after type legalization (which often
8269   // creates this pattern) and before operation legalization after which
8270   // we need to be more careful about the vector instructions that we generate.
8271   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8272       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8273
8274     EVT VecTy = N0.getOperand(0).getValueType();
8275     EVT ExTy = N0.getValueType();
8276     EVT TrTy = N->getValueType(0);
8277
8278     unsigned NumElem = VecTy.getVectorNumElements();
8279     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8280
8281     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8282     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8283
8284     SDValue EltNo = N0->getOperand(1);
8285     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8286       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8287       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8288       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8289
8290       SDLoc DL(N);
8291       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8292                          DAG.getBitcast(NVT, N0.getOperand(0)),
8293                          DAG.getConstant(Index, DL, IndexTy));
8294     }
8295   }
8296
8297   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8298   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8299     EVT SrcVT = N0.getValueType();
8300     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8301         TLI.isTruncateFree(SrcVT, VT)) {
8302       SDLoc SL(N0);
8303       SDValue Cond = N0.getOperand(0);
8304       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8305       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8306       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8307     }
8308   }
8309
8310   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8311   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8312       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8313       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8314     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
8315       uint64_t Amt = CAmt->getZExtValue();
8316       unsigned Size = VT.getScalarSizeInBits();
8317
8318       if (Amt < Size) {
8319         SDLoc SL(N);
8320         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8321
8322         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8323         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
8324                            DAG.getConstant(Amt, SL, AmtVT));
8325       }
8326     }
8327   }
8328
8329   // Fold a series of buildvector, bitcast, and truncate if possible.
8330   // For example fold
8331   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8332   //   (2xi32 (buildvector x, y)).
8333   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8334       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8335       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8336       N0.getOperand(0).hasOneUse()) {
8337
8338     SDValue BuildVect = N0.getOperand(0);
8339     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8340     EVT TruncVecEltTy = VT.getVectorElementType();
8341
8342     // Check that the element types match.
8343     if (BuildVectEltTy == TruncVecEltTy) {
8344       // Now we only need to compute the offset of the truncated elements.
8345       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8346       unsigned TruncVecNumElts = VT.getVectorNumElements();
8347       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8348
8349       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8350              "Invalid number of elements");
8351
8352       SmallVector<SDValue, 8> Opnds;
8353       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8354         Opnds.push_back(BuildVect.getOperand(i));
8355
8356       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8357     }
8358   }
8359
8360   // See if we can simplify the input to this truncate through knowledge that
8361   // only the low bits are being used.
8362   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8363   // Currently we only perform this optimization on scalars because vectors
8364   // may have different active low bits.
8365   if (!VT.isVector()) {
8366     if (SDValue Shorter =
8367             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
8368                                                      VT.getSizeInBits())))
8369       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8370   }
8371
8372   // fold (truncate (load x)) -> (smaller load x)
8373   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8374   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8375     if (SDValue Reduced = ReduceLoadWidth(N))
8376       return Reduced;
8377
8378     // Handle the case where the load remains an extending load even
8379     // after truncation.
8380     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8381       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8382       if (!LN0->isVolatile() &&
8383           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8384         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8385                                          VT, LN0->getChain(), LN0->getBasePtr(),
8386                                          LN0->getMemoryVT(),
8387                                          LN0->getMemOperand());
8388         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8389         return NewLoad;
8390       }
8391     }
8392   }
8393
8394   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8395   // where ... are all 'undef'.
8396   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8397     SmallVector<EVT, 8> VTs;
8398     SDValue V;
8399     unsigned Idx = 0;
8400     unsigned NumDefs = 0;
8401
8402     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8403       SDValue X = N0.getOperand(i);
8404       if (!X.isUndef()) {
8405         V = X;
8406         Idx = i;
8407         NumDefs++;
8408       }
8409       // Stop if more than one members are non-undef.
8410       if (NumDefs > 1)
8411         break;
8412       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8413                                      VT.getVectorElementType(),
8414                                      X.getValueType().getVectorNumElements()));
8415     }
8416
8417     if (NumDefs == 0)
8418       return DAG.getUNDEF(VT);
8419
8420     if (NumDefs == 1) {
8421       assert(V.getNode() && "The single defined operand is empty!");
8422       SmallVector<SDValue, 8> Opnds;
8423       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8424         if (i != Idx) {
8425           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8426           continue;
8427         }
8428         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8429         AddToWorklist(NV.getNode());
8430         Opnds.push_back(NV);
8431       }
8432       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8433     }
8434   }
8435
8436   // Fold truncate of a bitcast of a vector to an extract of the low vector
8437   // element.
8438   //
8439   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
8440   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8441     SDValue VecSrc = N0.getOperand(0);
8442     EVT SrcVT = VecSrc.getValueType();
8443     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8444         (!LegalOperations ||
8445          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8446       SDLoc SL(N);
8447
8448       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8449       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8450                          VecSrc, DAG.getConstant(0, SL, IdxVT));
8451     }
8452   }
8453
8454   // Simplify the operands using demanded-bits information.
8455   if (!VT.isVector() &&
8456       SimplifyDemandedBits(SDValue(N, 0)))
8457     return SDValue(N, 0);
8458
8459   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8460   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8461   // When the adde's carry is not used.
8462   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8463       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8464       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8465     SDLoc SL(N);
8466     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8467     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8468     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8469     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8470   }
8471
8472   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8473     return NewVSel;
8474
8475   return SDValue();
8476 }
8477
8478 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8479   SDValue Elt = N->getOperand(i);
8480   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8481     return Elt.getNode();
8482   return Elt.getOperand(Elt.getResNo()).getNode();
8483 }
8484
8485 /// build_pair (load, load) -> load
8486 /// if load locations are consecutive.
8487 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8488   assert(N->getOpcode() == ISD::BUILD_PAIR);
8489
8490   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8491   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8492   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8493       LD1->getAddressSpace() != LD2->getAddressSpace())
8494     return SDValue();
8495   EVT LD1VT = LD1->getValueType(0);
8496   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8497   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8498       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8499     unsigned Align = LD1->getAlignment();
8500     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8501         VT.getTypeForEVT(*DAG.getContext()));
8502
8503     if (NewAlign <= Align &&
8504         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8505       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8506                          LD1->getPointerInfo(), Align);
8507   }
8508
8509   return SDValue();
8510 }
8511
8512 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8513   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8514   // and Lo parts; on big-endian machines it doesn't.
8515   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8516 }
8517
8518 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8519                                     const TargetLowering &TLI) {
8520   // If this is not a bitcast to an FP type or if the target doesn't have
8521   // IEEE754-compliant FP logic, we're done.
8522   EVT VT = N->getValueType(0);
8523   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8524     return SDValue();
8525
8526   // TODO: Use splat values for the constant-checking below and remove this
8527   // restriction.
8528   SDValue N0 = N->getOperand(0);
8529   EVT SourceVT = N0.getValueType();
8530   if (SourceVT.isVector())
8531     return SDValue();
8532
8533   unsigned FPOpcode;
8534   APInt SignMask;
8535   switch (N0.getOpcode()) {
8536   case ISD::AND:
8537     FPOpcode = ISD::FABS;
8538     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8539     break;
8540   case ISD::XOR:
8541     FPOpcode = ISD::FNEG;
8542     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8543     break;
8544   // TODO: ISD::OR --> ISD::FNABS?
8545   default:
8546     return SDValue();
8547   }
8548
8549   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8550   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8551   SDValue LogicOp0 = N0.getOperand(0);
8552   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8553   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8554       LogicOp0.getOpcode() == ISD::BITCAST &&
8555       LogicOp0->getOperand(0).getValueType() == VT)
8556     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8557
8558   return SDValue();
8559 }
8560
8561 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8562   SDValue N0 = N->getOperand(0);
8563   EVT VT = N->getValueType(0);
8564
8565   if (N0.isUndef())
8566     return DAG.getUNDEF(VT);
8567
8568   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8569   // Only do this before legalize, since afterward the target may be depending
8570   // on the bitconvert.
8571   // First check to see if this is all constant.
8572   if (!LegalTypes &&
8573       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8574       VT.isVector()) {
8575     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8576
8577     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8578     assert(!DestEltVT.isVector() &&
8579            "Element type of vector ValueType must not be vector!");
8580     if (isSimple)
8581       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8582   }
8583
8584   // If the input is a constant, let getNode fold it.
8585   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8586     // If we can't allow illegal operations, we need to check that this is just
8587     // a fp -> int or int -> conversion and that the resulting operation will
8588     // be legal.
8589     if (!LegalOperations ||
8590         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8591          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8592         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8593          TLI.isOperationLegal(ISD::Constant, VT)))
8594       return DAG.getBitcast(VT, N0);
8595   }
8596
8597   // (conv (conv x, t1), t2) -> (conv x, t2)
8598   if (N0.getOpcode() == ISD::BITCAST)
8599     return DAG.getBitcast(VT, N0.getOperand(0));
8600
8601   // fold (conv (load x)) -> (load (conv*)x)
8602   // If the resultant load doesn't need a higher alignment than the original!
8603   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8604       // Do not change the width of a volatile load.
8605       !cast<LoadSDNode>(N0)->isVolatile() &&
8606       // Do not remove the cast if the types differ in endian layout.
8607       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8608           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8609       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8610       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8611     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8612     unsigned OrigAlign = LN0->getAlignment();
8613
8614     bool Fast = false;
8615     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8616                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8617         Fast) {
8618       SDValue Load =
8619           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8620                       LN0->getPointerInfo(), OrigAlign,
8621                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8622       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8623       return Load;
8624     }
8625   }
8626
8627   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8628     return V;
8629
8630   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8631   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8632   //
8633   // For ppc_fp128:
8634   // fold (bitcast (fneg x)) ->
8635   //     flipbit = signbit
8636   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8637   //
8638   // fold (bitcast (fabs x)) ->
8639   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8640   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8641   // This often reduces constant pool loads.
8642   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8643        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8644       N0.getNode()->hasOneUse() && VT.isInteger() &&
8645       !VT.isVector() && !N0.getValueType().isVector()) {
8646     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8647     AddToWorklist(NewConv.getNode());
8648
8649     SDLoc DL(N);
8650     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8651       assert(VT.getSizeInBits() == 128);
8652       SDValue SignBit = DAG.getConstant(
8653           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8654       SDValue FlipBit;
8655       if (N0.getOpcode() == ISD::FNEG) {
8656         FlipBit = SignBit;
8657         AddToWorklist(FlipBit.getNode());
8658       } else {
8659         assert(N0.getOpcode() == ISD::FABS);
8660         SDValue Hi =
8661             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8662                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8663                                               SDLoc(NewConv)));
8664         AddToWorklist(Hi.getNode());
8665         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8666         AddToWorklist(FlipBit.getNode());
8667       }
8668       SDValue FlipBits =
8669           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8670       AddToWorklist(FlipBits.getNode());
8671       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8672     }
8673     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8674     if (N0.getOpcode() == ISD::FNEG)
8675       return DAG.getNode(ISD::XOR, DL, VT,
8676                          NewConv, DAG.getConstant(SignBit, DL, VT));
8677     assert(N0.getOpcode() == ISD::FABS);
8678     return DAG.getNode(ISD::AND, DL, VT,
8679                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8680   }
8681
8682   // fold (bitconvert (fcopysign cst, x)) ->
8683   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8684   // Note that we don't handle (copysign x, cst) because this can always be
8685   // folded to an fneg or fabs.
8686   //
8687   // For ppc_fp128:
8688   // fold (bitcast (fcopysign cst, x)) ->
8689   //     flipbit = (and (extract_element
8690   //                     (xor (bitcast cst), (bitcast x)), 0),
8691   //                    signbit)
8692   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8693   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8694       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8695       VT.isInteger() && !VT.isVector()) {
8696     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8697     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8698     if (isTypeLegal(IntXVT)) {
8699       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8700       AddToWorklist(X.getNode());
8701
8702       // If X has a different width than the result/lhs, sext it or truncate it.
8703       unsigned VTWidth = VT.getSizeInBits();
8704       if (OrigXWidth < VTWidth) {
8705         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8706         AddToWorklist(X.getNode());
8707       } else if (OrigXWidth > VTWidth) {
8708         // To get the sign bit in the right place, we have to shift it right
8709         // before truncating.
8710         SDLoc DL(X);
8711         X = DAG.getNode(ISD::SRL, DL,
8712                         X.getValueType(), X,
8713                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8714                                         X.getValueType()));
8715         AddToWorklist(X.getNode());
8716         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8717         AddToWorklist(X.getNode());
8718       }
8719
8720       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8721         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8722         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8723         AddToWorklist(Cst.getNode());
8724         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8725         AddToWorklist(X.getNode());
8726         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8727         AddToWorklist(XorResult.getNode());
8728         SDValue XorResult64 = DAG.getNode(
8729             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8730             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8731                                   SDLoc(XorResult)));
8732         AddToWorklist(XorResult64.getNode());
8733         SDValue FlipBit =
8734             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8735                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8736         AddToWorklist(FlipBit.getNode());
8737         SDValue FlipBits =
8738             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8739         AddToWorklist(FlipBits.getNode());
8740         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8741       }
8742       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8743       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8744                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8745       AddToWorklist(X.getNode());
8746
8747       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8748       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8749                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8750       AddToWorklist(Cst.getNode());
8751
8752       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8753     }
8754   }
8755
8756   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8757   if (N0.getOpcode() == ISD::BUILD_PAIR)
8758     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8759       return CombineLD;
8760
8761   // Remove double bitcasts from shuffles - this is often a legacy of
8762   // XformToShuffleWithZero being used to combine bitmaskings (of
8763   // float vectors bitcast to integer vectors) into shuffles.
8764   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8765   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8766       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8767       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8768       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8769     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8770
8771     // If operands are a bitcast, peek through if it casts the original VT.
8772     // If operands are a constant, just bitcast back to original VT.
8773     auto PeekThroughBitcast = [&](SDValue Op) {
8774       if (Op.getOpcode() == ISD::BITCAST &&
8775           Op.getOperand(0).getValueType() == VT)
8776         return SDValue(Op.getOperand(0));
8777       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8778           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8779         return DAG.getBitcast(VT, Op);
8780       return SDValue();
8781     };
8782
8783     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8784     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8785     if (!(SV0 && SV1))
8786       return SDValue();
8787
8788     int MaskScale =
8789         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8790     SmallVector<int, 8> NewMask;
8791     for (int M : SVN->getMask())
8792       for (int i = 0; i != MaskScale; ++i)
8793         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8794
8795     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8796     if (!LegalMask) {
8797       std::swap(SV0, SV1);
8798       ShuffleVectorSDNode::commuteMask(NewMask);
8799       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8800     }
8801
8802     if (LegalMask)
8803       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8804   }
8805
8806   return SDValue();
8807 }
8808
8809 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8810   EVT VT = N->getValueType(0);
8811   return CombineConsecutiveLoads(N, VT);
8812 }
8813
8814 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8815 /// operands. DstEltVT indicates the destination element value type.
8816 SDValue DAGCombiner::
8817 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8818   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8819
8820   // If this is already the right type, we're done.
8821   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8822
8823   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8824   unsigned DstBitSize = DstEltVT.getSizeInBits();
8825
8826   // If this is a conversion of N elements of one type to N elements of another
8827   // type, convert each element.  This handles FP<->INT cases.
8828   if (SrcBitSize == DstBitSize) {
8829     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8830                               BV->getValueType(0).getVectorNumElements());
8831
8832     // Due to the FP element handling below calling this routine recursively,
8833     // we can end up with a scalar-to-vector node here.
8834     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8835       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8836                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8837
8838     SmallVector<SDValue, 8> Ops;
8839     for (SDValue Op : BV->op_values()) {
8840       // If the vector element type is not legal, the BUILD_VECTOR operands
8841       // are promoted and implicitly truncated.  Make that explicit here.
8842       if (Op.getValueType() != SrcEltVT)
8843         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8844       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8845       AddToWorklist(Ops.back().getNode());
8846     }
8847     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8848   }
8849
8850   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8851   // handle annoying details of growing/shrinking FP values, we convert them to
8852   // int first.
8853   if (SrcEltVT.isFloatingPoint()) {
8854     // Convert the input float vector to a int vector where the elements are the
8855     // same sizes.
8856     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8857     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8858     SrcEltVT = IntVT;
8859   }
8860
8861   // Now we know the input is an integer vector.  If the output is a FP type,
8862   // convert to integer first, then to FP of the right size.
8863   if (DstEltVT.isFloatingPoint()) {
8864     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8865     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8866
8867     // Next, convert to FP elements of the same size.
8868     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8869   }
8870
8871   SDLoc DL(BV);
8872
8873   // Okay, we know the src/dst types are both integers of differing types.
8874   // Handling growing first.
8875   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8876   if (SrcBitSize < DstBitSize) {
8877     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8878
8879     SmallVector<SDValue, 8> Ops;
8880     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8881          i += NumInputsPerOutput) {
8882       bool isLE = DAG.getDataLayout().isLittleEndian();
8883       APInt NewBits = APInt(DstBitSize, 0);
8884       bool EltIsUndef = true;
8885       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8886         // Shift the previously computed bits over.
8887         NewBits <<= SrcBitSize;
8888         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8889         if (Op.isUndef()) continue;
8890         EltIsUndef = false;
8891
8892         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8893                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8894       }
8895
8896       if (EltIsUndef)
8897         Ops.push_back(DAG.getUNDEF(DstEltVT));
8898       else
8899         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8900     }
8901
8902     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8903     return DAG.getBuildVector(VT, DL, Ops);
8904   }
8905
8906   // Finally, this must be the case where we are shrinking elements: each input
8907   // turns into multiple outputs.
8908   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8909   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8910                             NumOutputsPerInput*BV->getNumOperands());
8911   SmallVector<SDValue, 8> Ops;
8912
8913   for (const SDValue &Op : BV->op_values()) {
8914     if (Op.isUndef()) {
8915       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8916       continue;
8917     }
8918
8919     APInt OpVal = cast<ConstantSDNode>(Op)->
8920                   getAPIntValue().zextOrTrunc(SrcBitSize);
8921
8922     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8923       APInt ThisVal = OpVal.trunc(DstBitSize);
8924       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8925       OpVal.lshrInPlace(DstBitSize);
8926     }
8927
8928     // For big endian targets, swap the order of the pieces of each element.
8929     if (DAG.getDataLayout().isBigEndian())
8930       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8931   }
8932
8933   return DAG.getBuildVector(VT, DL, Ops);
8934 }
8935
8936 static bool isContractable(SDNode *N) {
8937   SDNodeFlags F = N->getFlags();
8938   return F.hasAllowContract() || F.hasUnsafeAlgebra();
8939 }
8940
8941 /// Try to perform FMA combining on a given FADD node.
8942 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8943   SDValue N0 = N->getOperand(0);
8944   SDValue N1 = N->getOperand(1);
8945   EVT VT = N->getValueType(0);
8946   SDLoc SL(N);
8947
8948   const TargetOptions &Options = DAG.getTarget().Options;
8949
8950   // Floating-point multiply-add with intermediate rounding.
8951   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8952
8953   // Floating-point multiply-add without intermediate rounding.
8954   bool HasFMA =
8955       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8956       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8957
8958   // No valid opcode, do not combine.
8959   if (!HasFMAD && !HasFMA)
8960     return SDValue();
8961
8962   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
8963                               Options.UnsafeFPMath || HasFMAD);
8964   // If the addition is not contractable, do not combine.
8965   if (!AllowFusionGlobally && !isContractable(N))
8966     return SDValue();
8967
8968   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8969   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
8970     return SDValue();
8971
8972   // Always prefer FMAD to FMA for precision.
8973   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8974   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8975   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8976
8977   // Is the node an FMUL and contractable either due to global flags or
8978   // SDNodeFlags.
8979   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
8980     if (N.getOpcode() != ISD::FMUL)
8981       return false;
8982     return AllowFusionGlobally || isContractable(N.getNode());
8983   };
8984   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8985   // prefer to fold the multiply with fewer uses.
8986   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
8987     if (N0.getNode()->use_size() > N1.getNode()->use_size())
8988       std::swap(N0, N1);
8989   }
8990
8991   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
8992   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
8993     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8994                        N0.getOperand(0), N0.getOperand(1), N1);
8995   }
8996
8997   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
8998   // Note: Commutes FADD operands.
8999   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9000     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9001                        N1.getOperand(0), N1.getOperand(1), N0);
9002   }
9003
9004   // Look through FP_EXTEND nodes to do more combining.
9005   if (LookThroughFPExt) {
9006     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9007     if (N0.getOpcode() == ISD::FP_EXTEND) {
9008       SDValue N00 = N0.getOperand(0);
9009       if (isContractableFMUL(N00))
9010         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9011                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9012                                        N00.getOperand(0)),
9013                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9014                                        N00.getOperand(1)), N1);
9015     }
9016
9017     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9018     // Note: Commutes FADD operands.
9019     if (N1.getOpcode() == ISD::FP_EXTEND) {
9020       SDValue N10 = N1.getOperand(0);
9021       if (isContractableFMUL(N10))
9022         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9023                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9024                                        N10.getOperand(0)),
9025                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9026                                        N10.getOperand(1)), N0);
9027     }
9028   }
9029
9030   // More folding opportunities when target permits.
9031   if (Aggressive) {
9032     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9033     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9034     // are currently only supported on binary nodes.
9035     if (Options.UnsafeFPMath &&
9036         N0.getOpcode() == PreferredFusedOpcode &&
9037         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9038         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9039       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9040                          N0.getOperand(0), N0.getOperand(1),
9041                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9042                                      N0.getOperand(2).getOperand(0),
9043                                      N0.getOperand(2).getOperand(1),
9044                                      N1));
9045     }
9046
9047     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9048     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9049     // are currently only supported on binary nodes.
9050     if (Options.UnsafeFPMath &&
9051         N1->getOpcode() == PreferredFusedOpcode &&
9052         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9053         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9054       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9055                          N1.getOperand(0), N1.getOperand(1),
9056                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9057                                      N1.getOperand(2).getOperand(0),
9058                                      N1.getOperand(2).getOperand(1),
9059                                      N0));
9060     }
9061
9062     if (LookThroughFPExt) {
9063       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9064       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9065       auto FoldFAddFMAFPExtFMul = [&] (
9066           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9067         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9068                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9069                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9070                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9071                                        Z));
9072       };
9073       if (N0.getOpcode() == PreferredFusedOpcode) {
9074         SDValue N02 = N0.getOperand(2);
9075         if (N02.getOpcode() == ISD::FP_EXTEND) {
9076           SDValue N020 = N02.getOperand(0);
9077           if (isContractableFMUL(N020))
9078             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9079                                         N020.getOperand(0), N020.getOperand(1),
9080                                         N1);
9081         }
9082       }
9083
9084       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9085       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9086       // FIXME: This turns two single-precision and one double-precision
9087       // operation into two double-precision operations, which might not be
9088       // interesting for all targets, especially GPUs.
9089       auto FoldFAddFPExtFMAFMul = [&] (
9090           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9091         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9092                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9093                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9094                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9095                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9096                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9097                                        Z));
9098       };
9099       if (N0.getOpcode() == ISD::FP_EXTEND) {
9100         SDValue N00 = N0.getOperand(0);
9101         if (N00.getOpcode() == PreferredFusedOpcode) {
9102           SDValue N002 = N00.getOperand(2);
9103           if (isContractableFMUL(N002))
9104             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9105                                         N002.getOperand(0), N002.getOperand(1),
9106                                         N1);
9107         }
9108       }
9109
9110       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9111       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9112       if (N1.getOpcode() == PreferredFusedOpcode) {
9113         SDValue N12 = N1.getOperand(2);
9114         if (N12.getOpcode() == ISD::FP_EXTEND) {
9115           SDValue N120 = N12.getOperand(0);
9116           if (isContractableFMUL(N120))
9117             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9118                                         N120.getOperand(0), N120.getOperand(1),
9119                                         N0);
9120         }
9121       }
9122
9123       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9124       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9125       // FIXME: This turns two single-precision and one double-precision
9126       // operation into two double-precision operations, which might not be
9127       // interesting for all targets, especially GPUs.
9128       if (N1.getOpcode() == ISD::FP_EXTEND) {
9129         SDValue N10 = N1.getOperand(0);
9130         if (N10.getOpcode() == PreferredFusedOpcode) {
9131           SDValue N102 = N10.getOperand(2);
9132           if (isContractableFMUL(N102))
9133             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9134                                         N102.getOperand(0), N102.getOperand(1),
9135                                         N0);
9136         }
9137       }
9138     }
9139   }
9140
9141   return SDValue();
9142 }
9143
9144 /// Try to perform FMA combining on a given FSUB node.
9145 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9146   SDValue N0 = N->getOperand(0);
9147   SDValue N1 = N->getOperand(1);
9148   EVT VT = N->getValueType(0);
9149   SDLoc SL(N);
9150
9151   const TargetOptions &Options = DAG.getTarget().Options;
9152   // Floating-point multiply-add with intermediate rounding.
9153   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9154
9155   // Floating-point multiply-add without intermediate rounding.
9156   bool HasFMA =
9157       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9158       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9159
9160   // No valid opcode, do not combine.
9161   if (!HasFMAD && !HasFMA)
9162     return SDValue();
9163
9164   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9165                               Options.UnsafeFPMath || HasFMAD);
9166   // If the subtraction is not contractable, do not combine.
9167   if (!AllowFusionGlobally && !isContractable(N))
9168     return SDValue();
9169
9170   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9171   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9172     return SDValue();
9173
9174   // Always prefer FMAD to FMA for precision.
9175   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9176   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9177   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9178
9179   // Is the node an FMUL and contractable either due to global flags or
9180   // SDNodeFlags.
9181   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9182     if (N.getOpcode() != ISD::FMUL)
9183       return false;
9184     return AllowFusionGlobally || isContractable(N.getNode());
9185   };
9186
9187   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9188   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9189     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9190                        N0.getOperand(0), N0.getOperand(1),
9191                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9192   }
9193
9194   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9195   // Note: Commutes FSUB operands.
9196   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9197     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9198                        DAG.getNode(ISD::FNEG, SL, VT,
9199                                    N1.getOperand(0)),
9200                        N1.getOperand(1), N0);
9201
9202   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9203   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9204       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9205     SDValue N00 = N0.getOperand(0).getOperand(0);
9206     SDValue N01 = N0.getOperand(0).getOperand(1);
9207     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9208                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9209                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9210   }
9211
9212   // Look through FP_EXTEND nodes to do more combining.
9213   if (LookThroughFPExt) {
9214     // fold (fsub (fpext (fmul x, y)), z)
9215     //   -> (fma (fpext x), (fpext y), (fneg z))
9216     if (N0.getOpcode() == ISD::FP_EXTEND) {
9217       SDValue N00 = N0.getOperand(0);
9218       if (isContractableFMUL(N00))
9219         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9220                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9221                                        N00.getOperand(0)),
9222                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9223                                        N00.getOperand(1)),
9224                            DAG.getNode(ISD::FNEG, SL, VT, N1));
9225     }
9226
9227     // fold (fsub x, (fpext (fmul y, z)))
9228     //   -> (fma (fneg (fpext y)), (fpext z), x)
9229     // Note: Commutes FSUB operands.
9230     if (N1.getOpcode() == ISD::FP_EXTEND) {
9231       SDValue N10 = N1.getOperand(0);
9232       if (isContractableFMUL(N10))
9233         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9234                            DAG.getNode(ISD::FNEG, SL, VT,
9235                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9236                                                    N10.getOperand(0))),
9237                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9238                                        N10.getOperand(1)),
9239                            N0);
9240     }
9241
9242     // fold (fsub (fpext (fneg (fmul, x, y))), z)
9243     //   -> (fneg (fma (fpext x), (fpext y), z))
9244     // Note: This could be removed with appropriate canonicalization of the
9245     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9246     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9247     // from implementing the canonicalization in visitFSUB.
9248     if (N0.getOpcode() == ISD::FP_EXTEND) {
9249       SDValue N00 = N0.getOperand(0);
9250       if (N00.getOpcode() == ISD::FNEG) {
9251         SDValue N000 = N00.getOperand(0);
9252         if (isContractableFMUL(N000)) {
9253           return DAG.getNode(ISD::FNEG, SL, VT,
9254                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9255                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9256                                                      N000.getOperand(0)),
9257                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9258                                                      N000.getOperand(1)),
9259                                          N1));
9260         }
9261       }
9262     }
9263
9264     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9265     //   -> (fneg (fma (fpext x)), (fpext y), z)
9266     // Note: This could be removed with appropriate canonicalization of the
9267     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9268     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9269     // from implementing the canonicalization in visitFSUB.
9270     if (N0.getOpcode() == ISD::FNEG) {
9271       SDValue N00 = N0.getOperand(0);
9272       if (N00.getOpcode() == ISD::FP_EXTEND) {
9273         SDValue N000 = N00.getOperand(0);
9274         if (isContractableFMUL(N000)) {
9275           return DAG.getNode(ISD::FNEG, SL, VT,
9276                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9277                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9278                                                      N000.getOperand(0)),
9279                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9280                                                      N000.getOperand(1)),
9281                                          N1));
9282         }
9283       }
9284     }
9285
9286   }
9287
9288   // More folding opportunities when target permits.
9289   if (Aggressive) {
9290     // fold (fsub (fma x, y, (fmul u, v)), z)
9291     //   -> (fma x, y (fma u, v, (fneg z)))
9292     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9293     // are currently only supported on binary nodes.
9294     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9295         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9296         N0.getOperand(2)->hasOneUse()) {
9297       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9298                          N0.getOperand(0), N0.getOperand(1),
9299                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9300                                      N0.getOperand(2).getOperand(0),
9301                                      N0.getOperand(2).getOperand(1),
9302                                      DAG.getNode(ISD::FNEG, SL, VT,
9303                                                  N1)));
9304     }
9305
9306     // fold (fsub x, (fma y, z, (fmul u, v)))
9307     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9308     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9309     // are currently only supported on binary nodes.
9310     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9311         isContractableFMUL(N1.getOperand(2))) {
9312       SDValue N20 = N1.getOperand(2).getOperand(0);
9313       SDValue N21 = N1.getOperand(2).getOperand(1);
9314       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9315                          DAG.getNode(ISD::FNEG, SL, VT,
9316                                      N1.getOperand(0)),
9317                          N1.getOperand(1),
9318                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9319                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9320
9321                                      N21, N0));
9322     }
9323
9324     if (LookThroughFPExt) {
9325       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9326       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9327       if (N0.getOpcode() == PreferredFusedOpcode) {
9328         SDValue N02 = N0.getOperand(2);
9329         if (N02.getOpcode() == ISD::FP_EXTEND) {
9330           SDValue N020 = N02.getOperand(0);
9331           if (isContractableFMUL(N020))
9332             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9333                                N0.getOperand(0), N0.getOperand(1),
9334                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9335                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9336                                                        N020.getOperand(0)),
9337                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9338                                                        N020.getOperand(1)),
9339                                            DAG.getNode(ISD::FNEG, SL, VT,
9340                                                        N1)));
9341         }
9342       }
9343
9344       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9345       //   -> (fma (fpext x), (fpext y),
9346       //           (fma (fpext u), (fpext v), (fneg z)))
9347       // FIXME: This turns two single-precision and one double-precision
9348       // operation into two double-precision operations, which might not be
9349       // interesting for all targets, especially GPUs.
9350       if (N0.getOpcode() == ISD::FP_EXTEND) {
9351         SDValue N00 = N0.getOperand(0);
9352         if (N00.getOpcode() == PreferredFusedOpcode) {
9353           SDValue N002 = N00.getOperand(2);
9354           if (isContractableFMUL(N002))
9355             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9356                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9357                                            N00.getOperand(0)),
9358                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9359                                            N00.getOperand(1)),
9360                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9361                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9362                                                        N002.getOperand(0)),
9363                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9364                                                        N002.getOperand(1)),
9365                                            DAG.getNode(ISD::FNEG, SL, VT,
9366                                                        N1)));
9367         }
9368       }
9369
9370       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9371       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9372       if (N1.getOpcode() == PreferredFusedOpcode &&
9373         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9374         SDValue N120 = N1.getOperand(2).getOperand(0);
9375         if (isContractableFMUL(N120)) {
9376           SDValue N1200 = N120.getOperand(0);
9377           SDValue N1201 = N120.getOperand(1);
9378           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9379                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9380                              N1.getOperand(1),
9381                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9382                                          DAG.getNode(ISD::FNEG, SL, VT,
9383                                              DAG.getNode(ISD::FP_EXTEND, SL,
9384                                                          VT, N1200)),
9385                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9386                                                      N1201),
9387                                          N0));
9388         }
9389       }
9390
9391       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9392       //   -> (fma (fneg (fpext y)), (fpext z),
9393       //           (fma (fneg (fpext u)), (fpext v), x))
9394       // FIXME: This turns two single-precision and one double-precision
9395       // operation into two double-precision operations, which might not be
9396       // interesting for all targets, especially GPUs.
9397       if (N1.getOpcode() == ISD::FP_EXTEND &&
9398         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9399         SDValue N100 = N1.getOperand(0).getOperand(0);
9400         SDValue N101 = N1.getOperand(0).getOperand(1);
9401         SDValue N102 = N1.getOperand(0).getOperand(2);
9402         if (isContractableFMUL(N102)) {
9403           SDValue N1020 = N102.getOperand(0);
9404           SDValue N1021 = N102.getOperand(1);
9405           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9406                              DAG.getNode(ISD::FNEG, SL, VT,
9407                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9408                                                      N100)),
9409                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9410                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9411                                          DAG.getNode(ISD::FNEG, SL, VT,
9412                                              DAG.getNode(ISD::FP_EXTEND, SL,
9413                                                          VT, N1020)),
9414                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9415                                                      N1021),
9416                                          N0));
9417         }
9418       }
9419     }
9420   }
9421
9422   return SDValue();
9423 }
9424
9425 /// Try to perform FMA combining on a given FMUL node based on the distributive
9426 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9427 /// subtraction instead of addition).
9428 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9429   SDValue N0 = N->getOperand(0);
9430   SDValue N1 = N->getOperand(1);
9431   EVT VT = N->getValueType(0);
9432   SDLoc SL(N);
9433
9434   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9435
9436   const TargetOptions &Options = DAG.getTarget().Options;
9437
9438   // The transforms below are incorrect when x == 0 and y == inf, because the
9439   // intermediate multiplication produces a nan.
9440   if (!Options.NoInfsFPMath)
9441     return SDValue();
9442
9443   // Floating-point multiply-add without intermediate rounding.
9444   bool HasFMA =
9445       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9446       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9447       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9448
9449   // Floating-point multiply-add with intermediate rounding. This can result
9450   // in a less precise result due to the changed rounding order.
9451   bool HasFMAD = Options.UnsafeFPMath &&
9452                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9453
9454   // No valid opcode, do not combine.
9455   if (!HasFMAD && !HasFMA)
9456     return SDValue();
9457
9458   // Always prefer FMAD to FMA for precision.
9459   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9460   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9461
9462   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9463   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9464   auto FuseFADD = [&](SDValue X, SDValue Y) {
9465     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9466       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9467       if (XC1 && XC1->isExactlyValue(+1.0))
9468         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9469       if (XC1 && XC1->isExactlyValue(-1.0))
9470         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9471                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9472     }
9473     return SDValue();
9474   };
9475
9476   if (SDValue FMA = FuseFADD(N0, N1))
9477     return FMA;
9478   if (SDValue FMA = FuseFADD(N1, N0))
9479     return FMA;
9480
9481   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9482   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9483   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9484   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9485   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9486     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9487       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9488       if (XC0 && XC0->isExactlyValue(+1.0))
9489         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9490                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9491                            Y);
9492       if (XC0 && XC0->isExactlyValue(-1.0))
9493         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9494                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9495                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9496
9497       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9498       if (XC1 && XC1->isExactlyValue(+1.0))
9499         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9500                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9501       if (XC1 && XC1->isExactlyValue(-1.0))
9502         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9503     }
9504     return SDValue();
9505   };
9506
9507   if (SDValue FMA = FuseFSUB(N0, N1))
9508     return FMA;
9509   if (SDValue FMA = FuseFSUB(N1, N0))
9510     return FMA;
9511
9512   return SDValue();
9513 }
9514
9515 static bool isFMulNegTwo(SDValue &N) {
9516   if (N.getOpcode() != ISD::FMUL)
9517     return false;
9518   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9519     return CFP->isExactlyValue(-2.0);
9520   return false;
9521 }
9522
9523 SDValue DAGCombiner::visitFADD(SDNode *N) {
9524   SDValue N0 = N->getOperand(0);
9525   SDValue N1 = N->getOperand(1);
9526   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9527   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9528   EVT VT = N->getValueType(0);
9529   SDLoc DL(N);
9530   const TargetOptions &Options = DAG.getTarget().Options;
9531   const SDNodeFlags Flags = N->getFlags();
9532
9533   // fold vector ops
9534   if (VT.isVector())
9535     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9536       return FoldedVOp;
9537
9538   // fold (fadd c1, c2) -> c1 + c2
9539   if (N0CFP && N1CFP)
9540     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9541
9542   // canonicalize constant to RHS
9543   if (N0CFP && !N1CFP)
9544     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9545
9546   if (SDValue NewSel = foldBinOpIntoSelect(N))
9547     return NewSel;
9548
9549   // fold (fadd A, (fneg B)) -> (fsub A, B)
9550   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9551       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9552     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9553                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9554
9555   // fold (fadd (fneg A), B) -> (fsub B, A)
9556   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9557       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9558     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9559                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9560
9561   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9562   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9563   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9564       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9565     bool N1IsFMul = isFMulNegTwo(N1);
9566     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9567     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9568     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9569   }
9570
9571   // FIXME: Auto-upgrade the target/function-level option.
9572   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9573     // fold (fadd A, 0) -> A
9574     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9575       if (N1C->isZero())
9576         return N0;
9577   }
9578
9579   // If 'unsafe math' is enabled, fold lots of things.
9580   if (Options.UnsafeFPMath) {
9581     // No FP constant should be created after legalization as Instruction
9582     // Selection pass has a hard time dealing with FP constants.
9583     bool AllowNewConst = (Level < AfterLegalizeDAG);
9584
9585     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9586     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9587         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9588       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9589                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9590                                      Flags),
9591                          Flags);
9592
9593     // If allowed, fold (fadd (fneg x), x) -> 0.0
9594     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9595       return DAG.getConstantFP(0.0, DL, VT);
9596
9597     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9598     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9599       return DAG.getConstantFP(0.0, DL, VT);
9600
9601     // We can fold chains of FADD's of the same value into multiplications.
9602     // This transform is not safe in general because we are reducing the number
9603     // of rounding steps.
9604     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9605       if (N0.getOpcode() == ISD::FMUL) {
9606         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9607         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9608
9609         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9610         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9611           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9612                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9613           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9614         }
9615
9616         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9617         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9618             N1.getOperand(0) == N1.getOperand(1) &&
9619             N0.getOperand(0) == N1.getOperand(0)) {
9620           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9621                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9622           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9623         }
9624       }
9625
9626       if (N1.getOpcode() == ISD::FMUL) {
9627         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9628         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9629
9630         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9631         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9632           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9633                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9634           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9635         }
9636
9637         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9638         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9639             N0.getOperand(0) == N0.getOperand(1) &&
9640             N1.getOperand(0) == N0.getOperand(0)) {
9641           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9642                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9643           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9644         }
9645       }
9646
9647       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9648         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9649         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9650         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9651             (N0.getOperand(0) == N1)) {
9652           return DAG.getNode(ISD::FMUL, DL, VT,
9653                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9654         }
9655       }
9656
9657       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9658         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9659         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9660         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9661             N1.getOperand(0) == N0) {
9662           return DAG.getNode(ISD::FMUL, DL, VT,
9663                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9664         }
9665       }
9666
9667       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9668       if (AllowNewConst &&
9669           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9670           N0.getOperand(0) == N0.getOperand(1) &&
9671           N1.getOperand(0) == N1.getOperand(1) &&
9672           N0.getOperand(0) == N1.getOperand(0)) {
9673         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9674                            DAG.getConstantFP(4.0, DL, VT), Flags);
9675       }
9676     }
9677   } // enable-unsafe-fp-math
9678
9679   // FADD -> FMA combines:
9680   if (SDValue Fused = visitFADDForFMACombine(N)) {
9681     AddToWorklist(Fused.getNode());
9682     return Fused;
9683   }
9684   return SDValue();
9685 }
9686
9687 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9688   SDValue N0 = N->getOperand(0);
9689   SDValue N1 = N->getOperand(1);
9690   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9691   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9692   EVT VT = N->getValueType(0);
9693   SDLoc DL(N);
9694   const TargetOptions &Options = DAG.getTarget().Options;
9695   const SDNodeFlags Flags = N->getFlags();
9696
9697   // fold vector ops
9698   if (VT.isVector())
9699     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9700       return FoldedVOp;
9701
9702   // fold (fsub c1, c2) -> c1-c2
9703   if (N0CFP && N1CFP)
9704     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9705
9706   if (SDValue NewSel = foldBinOpIntoSelect(N))
9707     return NewSel;
9708
9709   // fold (fsub A, (fneg B)) -> (fadd A, B)
9710   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9711     return DAG.getNode(ISD::FADD, DL, VT, N0,
9712                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9713
9714   // FIXME: Auto-upgrade the target/function-level option.
9715   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
9716     // (fsub 0, B) -> -B
9717     if (N0CFP && N0CFP->isZero()) {
9718       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9719         return GetNegatedExpression(N1, DAG, LegalOperations);
9720       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9721         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9722     }
9723   }
9724
9725   // If 'unsafe math' is enabled, fold lots of things.
9726   if (Options.UnsafeFPMath) {
9727     // (fsub A, 0) -> A
9728     if (N1CFP && N1CFP->isZero())
9729       return N0;
9730
9731     // (fsub x, x) -> 0.0
9732     if (N0 == N1)
9733       return DAG.getConstantFP(0.0f, DL, VT);
9734
9735     // (fsub x, (fadd x, y)) -> (fneg y)
9736     // (fsub x, (fadd y, x)) -> (fneg y)
9737     if (N1.getOpcode() == ISD::FADD) {
9738       SDValue N10 = N1->getOperand(0);
9739       SDValue N11 = N1->getOperand(1);
9740
9741       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9742         return GetNegatedExpression(N11, DAG, LegalOperations);
9743
9744       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9745         return GetNegatedExpression(N10, DAG, LegalOperations);
9746     }
9747   }
9748
9749   // FSUB -> FMA combines:
9750   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9751     AddToWorklist(Fused.getNode());
9752     return Fused;
9753   }
9754
9755   return SDValue();
9756 }
9757
9758 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9759   SDValue N0 = N->getOperand(0);
9760   SDValue N1 = N->getOperand(1);
9761   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9762   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9763   EVT VT = N->getValueType(0);
9764   SDLoc DL(N);
9765   const TargetOptions &Options = DAG.getTarget().Options;
9766   const SDNodeFlags Flags = N->getFlags();
9767
9768   // fold vector ops
9769   if (VT.isVector()) {
9770     // This just handles C1 * C2 for vectors. Other vector folds are below.
9771     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9772       return FoldedVOp;
9773   }
9774
9775   // fold (fmul c1, c2) -> c1*c2
9776   if (N0CFP && N1CFP)
9777     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9778
9779   // canonicalize constant to RHS
9780   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9781      !isConstantFPBuildVectorOrConstantFP(N1))
9782     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9783
9784   // fold (fmul A, 1.0) -> A
9785   if (N1CFP && N1CFP->isExactlyValue(1.0))
9786     return N0;
9787
9788   if (SDValue NewSel = foldBinOpIntoSelect(N))
9789     return NewSel;
9790
9791   if (Options.UnsafeFPMath) {
9792     // fold (fmul A, 0) -> 0
9793     if (N1CFP && N1CFP->isZero())
9794       return N1;
9795
9796     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9797     if (N0.getOpcode() == ISD::FMUL) {
9798       // Fold scalars or any vector constants (not just splats).
9799       // This fold is done in general by InstCombine, but extra fmul insts
9800       // may have been generated during lowering.
9801       SDValue N00 = N0.getOperand(0);
9802       SDValue N01 = N0.getOperand(1);
9803       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9804       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9805       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9806
9807       // Check 1: Make sure that the first operand of the inner multiply is NOT
9808       // a constant. Otherwise, we may induce infinite looping.
9809       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9810         // Check 2: Make sure that the second operand of the inner multiply and
9811         // the second operand of the outer multiply are constants.
9812         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9813             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9814           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9815           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9816         }
9817       }
9818     }
9819
9820     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9821     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9822     // during an early run of DAGCombiner can prevent folding with fmuls
9823     // inserted during lowering.
9824     if (N0.getOpcode() == ISD::FADD &&
9825         (N0.getOperand(0) == N0.getOperand(1)) &&
9826         N0.hasOneUse()) {
9827       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9828       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9829       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9830     }
9831   }
9832
9833   // fold (fmul X, 2.0) -> (fadd X, X)
9834   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9835     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9836
9837   // fold (fmul X, -1.0) -> (fneg X)
9838   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9839     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9840       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9841
9842   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9843   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9844     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9845       // Both can be negated for free, check to see if at least one is cheaper
9846       // negated.
9847       if (LHSNeg == 2 || RHSNeg == 2)
9848         return DAG.getNode(ISD::FMUL, DL, VT,
9849                            GetNegatedExpression(N0, DAG, LegalOperations),
9850                            GetNegatedExpression(N1, DAG, LegalOperations),
9851                            Flags);
9852     }
9853   }
9854
9855   // FMUL -> FMA combines:
9856   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9857     AddToWorklist(Fused.getNode());
9858     return Fused;
9859   }
9860
9861   return SDValue();
9862 }
9863
9864 SDValue DAGCombiner::visitFMA(SDNode *N) {
9865   SDValue N0 = N->getOperand(0);
9866   SDValue N1 = N->getOperand(1);
9867   SDValue N2 = N->getOperand(2);
9868   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9869   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9870   EVT VT = N->getValueType(0);
9871   SDLoc DL(N);
9872   const TargetOptions &Options = DAG.getTarget().Options;
9873
9874   // Constant fold FMA.
9875   if (isa<ConstantFPSDNode>(N0) &&
9876       isa<ConstantFPSDNode>(N1) &&
9877       isa<ConstantFPSDNode>(N2)) {
9878     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
9879   }
9880
9881   if (Options.UnsafeFPMath) {
9882     if (N0CFP && N0CFP->isZero())
9883       return N2;
9884     if (N1CFP && N1CFP->isZero())
9885       return N2;
9886   }
9887   // TODO: The FMA node should have flags that propagate to these nodes.
9888   if (N0CFP && N0CFP->isExactlyValue(1.0))
9889     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
9890   if (N1CFP && N1CFP->isExactlyValue(1.0))
9891     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
9892
9893   // Canonicalize (fma c, x, y) -> (fma x, c, y)
9894   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9895      !isConstantFPBuildVectorOrConstantFP(N1))
9896     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
9897
9898   // TODO: FMA nodes should have flags that propagate to the created nodes.
9899   // For now, create a Flags object for use with all unsafe math transforms.
9900   SDNodeFlags Flags;
9901   Flags.setUnsafeAlgebra(true);
9902
9903   if (Options.UnsafeFPMath) {
9904     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
9905     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
9906         isConstantFPBuildVectorOrConstantFP(N1) &&
9907         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
9908       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9909                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
9910                                      Flags), Flags);
9911     }
9912
9913     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
9914     if (N0.getOpcode() == ISD::FMUL &&
9915         isConstantFPBuildVectorOrConstantFP(N1) &&
9916         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
9917       return DAG.getNode(ISD::FMA, DL, VT,
9918                          N0.getOperand(0),
9919                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
9920                                      Flags),
9921                          N2);
9922     }
9923   }
9924
9925   // (fma x, 1, y) -> (fadd x, y)
9926   // (fma x, -1, y) -> (fadd (fneg x), y)
9927   if (N1CFP) {
9928     if (N1CFP->isExactlyValue(1.0))
9929       // TODO: The FMA node should have flags that propagate to this node.
9930       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
9931
9932     if (N1CFP->isExactlyValue(-1.0) &&
9933         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
9934       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
9935       AddToWorklist(RHSNeg.getNode());
9936       // TODO: The FMA node should have flags that propagate to this node.
9937       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9938     }
9939   }
9940
9941   if (Options.UnsafeFPMath) {
9942     // (fma x, c, x) -> (fmul x, (c+1))
9943     if (N1CFP && N0 == N2) {
9944       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9945                          DAG.getNode(ISD::FADD, DL, VT, N1,
9946                                      DAG.getConstantFP(1.0, DL, VT), Flags),
9947                          Flags);
9948     }
9949
9950     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
9951     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
9952       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9953                          DAG.getNode(ISD::FADD, DL, VT, N1,
9954                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
9955                          Flags);
9956     }
9957   }
9958
9959   return SDValue();
9960 }
9961
9962 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9963 // reciprocal.
9964 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
9965 // Notice that this is not always beneficial. One reason is different targets
9966 // may have different costs for FDIV and FMUL, so sometimes the cost of two
9967 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
9968 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
9969 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
9970   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
9971   const SDNodeFlags Flags = N->getFlags();
9972   if (!UnsafeMath && !Flags.hasAllowReciprocal())
9973     return SDValue();
9974
9975   // Skip if current node is a reciprocal.
9976   SDValue N0 = N->getOperand(0);
9977   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9978   if (N0CFP && N0CFP->isExactlyValue(1.0))
9979     return SDValue();
9980
9981   // Exit early if the target does not want this transform or if there can't
9982   // possibly be enough uses of the divisor to make the transform worthwhile.
9983   SDValue N1 = N->getOperand(1);
9984   unsigned MinUses = TLI.combineRepeatedFPDivisors();
9985   if (!MinUses || N1->use_size() < MinUses)
9986     return SDValue();
9987
9988   // Find all FDIV users of the same divisor.
9989   // Use a set because duplicates may be present in the user list.
9990   SetVector<SDNode *> Users;
9991   for (auto *U : N1->uses()) {
9992     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
9993       // This division is eligible for optimization only if global unsafe math
9994       // is enabled or if this division allows reciprocal formation.
9995       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
9996         Users.insert(U);
9997     }
9998   }
9999
10000   // Now that we have the actual number of divisor uses, make sure it meets
10001   // the minimum threshold specified by the target.
10002   if (Users.size() < MinUses)
10003     return SDValue();
10004
10005   EVT VT = N->getValueType(0);
10006   SDLoc DL(N);
10007   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10008   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10009
10010   // Dividend / Divisor -> Dividend * Reciprocal
10011   for (auto *U : Users) {
10012     SDValue Dividend = U->getOperand(0);
10013     if (Dividend != FPOne) {
10014       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10015                                     Reciprocal, Flags);
10016       CombineTo(U, NewNode);
10017     } else if (U != Reciprocal.getNode()) {
10018       // In the absence of fast-math-flags, this user node is always the
10019       // same node as Reciprocal, but with FMF they may be different nodes.
10020       CombineTo(U, Reciprocal);
10021     }
10022   }
10023   return SDValue(N, 0);  // N was replaced.
10024 }
10025
10026 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10027   SDValue N0 = N->getOperand(0);
10028   SDValue N1 = N->getOperand(1);
10029   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10030   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10031   EVT VT = N->getValueType(0);
10032   SDLoc DL(N);
10033   const TargetOptions &Options = DAG.getTarget().Options;
10034   SDNodeFlags Flags = N->getFlags();
10035
10036   // fold vector ops
10037   if (VT.isVector())
10038     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10039       return FoldedVOp;
10040
10041   // fold (fdiv c1, c2) -> c1/c2
10042   if (N0CFP && N1CFP)
10043     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10044
10045   if (SDValue NewSel = foldBinOpIntoSelect(N))
10046     return NewSel;
10047
10048   if (Options.UnsafeFPMath) {
10049     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10050     if (N1CFP) {
10051       // Compute the reciprocal 1.0 / c2.
10052       const APFloat &N1APF = N1CFP->getValueAPF();
10053       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10054       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10055       // Only do the transform if the reciprocal is a legal fp immediate that
10056       // isn't too nasty (eg NaN, denormal, ...).
10057       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10058           (!LegalOperations ||
10059            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10060            // backend)... we should handle this gracefully after Legalize.
10061            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
10062            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
10063            TLI.isFPImmLegal(Recip, VT)))
10064         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10065                            DAG.getConstantFP(Recip, DL, VT), Flags);
10066     }
10067
10068     // If this FDIV is part of a reciprocal square root, it may be folded
10069     // into a target-specific square root estimate instruction.
10070     if (N1.getOpcode() == ISD::FSQRT) {
10071       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10072         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10073       }
10074     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10075                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10076       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10077                                           Flags)) {
10078         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10079         AddToWorklist(RV.getNode());
10080         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10081       }
10082     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10083                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10084       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10085                                           Flags)) {
10086         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10087         AddToWorklist(RV.getNode());
10088         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10089       }
10090     } else if (N1.getOpcode() == ISD::FMUL) {
10091       // Look through an FMUL. Even though this won't remove the FDIV directly,
10092       // it's still worthwhile to get rid of the FSQRT if possible.
10093       SDValue SqrtOp;
10094       SDValue OtherOp;
10095       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10096         SqrtOp = N1.getOperand(0);
10097         OtherOp = N1.getOperand(1);
10098       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10099         SqrtOp = N1.getOperand(1);
10100         OtherOp = N1.getOperand(0);
10101       }
10102       if (SqrtOp.getNode()) {
10103         // We found a FSQRT, so try to make this fold:
10104         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10105         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10106           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10107           AddToWorklist(RV.getNode());
10108           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10109         }
10110       }
10111     }
10112
10113     // Fold into a reciprocal estimate and multiply instead of a real divide.
10114     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10115       AddToWorklist(RV.getNode());
10116       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10117     }
10118   }
10119
10120   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10121   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10122     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10123       // Both can be negated for free, check to see if at least one is cheaper
10124       // negated.
10125       if (LHSNeg == 2 || RHSNeg == 2)
10126         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10127                            GetNegatedExpression(N0, DAG, LegalOperations),
10128                            GetNegatedExpression(N1, DAG, LegalOperations),
10129                            Flags);
10130     }
10131   }
10132
10133   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10134     return CombineRepeatedDivisors;
10135
10136   return SDValue();
10137 }
10138
10139 SDValue DAGCombiner::visitFREM(SDNode *N) {
10140   SDValue N0 = N->getOperand(0);
10141   SDValue N1 = N->getOperand(1);
10142   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10143   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10144   EVT VT = N->getValueType(0);
10145
10146   // fold (frem c1, c2) -> fmod(c1,c2)
10147   if (N0CFP && N1CFP)
10148     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10149
10150   if (SDValue NewSel = foldBinOpIntoSelect(N))
10151     return NewSel;
10152
10153   return SDValue();
10154 }
10155
10156 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10157   if (!DAG.getTarget().Options.UnsafeFPMath)
10158     return SDValue();
10159
10160   SDValue N0 = N->getOperand(0);
10161   if (TLI.isFsqrtCheap(N0, DAG))
10162     return SDValue();
10163
10164   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10165   // For now, create a Flags object for use with all unsafe math transforms.
10166   SDNodeFlags Flags;
10167   Flags.setUnsafeAlgebra(true);
10168   return buildSqrtEstimate(N0, Flags);
10169 }
10170
10171 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10172 /// copysign(x, fp_round(y)) -> copysign(x, y)
10173 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10174   SDValue N1 = N->getOperand(1);
10175   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10176        N1.getOpcode() == ISD::FP_ROUND)) {
10177     // Do not optimize out type conversion of f128 type yet.
10178     // For some targets like x86_64, configuration is changed to keep one f128
10179     // value in one SSE register, but instruction selection cannot handle
10180     // FCOPYSIGN on SSE registers yet.
10181     EVT N1VT = N1->getValueType(0);
10182     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10183     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10184   }
10185   return false;
10186 }
10187
10188 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10189   SDValue N0 = N->getOperand(0);
10190   SDValue N1 = N->getOperand(1);
10191   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10192   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10193   EVT VT = N->getValueType(0);
10194
10195   if (N0CFP && N1CFP) // Constant fold
10196     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10197
10198   if (N1CFP) {
10199     const APFloat &V = N1CFP->getValueAPF();
10200     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10201     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10202     if (!V.isNegative()) {
10203       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10204         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10205     } else {
10206       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10207         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10208                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10209     }
10210   }
10211
10212   // copysign(fabs(x), y) -> copysign(x, y)
10213   // copysign(fneg(x), y) -> copysign(x, y)
10214   // copysign(copysign(x,z), y) -> copysign(x, y)
10215   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10216       N0.getOpcode() == ISD::FCOPYSIGN)
10217     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10218
10219   // copysign(x, abs(y)) -> abs(x)
10220   if (N1.getOpcode() == ISD::FABS)
10221     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10222
10223   // copysign(x, copysign(y,z)) -> copysign(x, z)
10224   if (N1.getOpcode() == ISD::FCOPYSIGN)
10225     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10226
10227   // copysign(x, fp_extend(y)) -> copysign(x, y)
10228   // copysign(x, fp_round(y)) -> copysign(x, y)
10229   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10230     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10231
10232   return SDValue();
10233 }
10234
10235 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10236   SDValue N0 = N->getOperand(0);
10237   EVT VT = N->getValueType(0);
10238   EVT OpVT = N0.getValueType();
10239
10240   // fold (sint_to_fp c1) -> c1fp
10241   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10242       // ...but only if the target supports immediate floating-point values
10243       (!LegalOperations ||
10244        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10245     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10246
10247   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10248   // but UINT_TO_FP is legal on this target, try to convert.
10249   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10250       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10251     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10252     if (DAG.SignBitIsZero(N0))
10253       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10254   }
10255
10256   // The next optimizations are desirable only if SELECT_CC can be lowered.
10257   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10258     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10259     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10260         !VT.isVector() &&
10261         (!LegalOperations ||
10262          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10263       SDLoc DL(N);
10264       SDValue Ops[] =
10265         { N0.getOperand(0), N0.getOperand(1),
10266           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10267           N0.getOperand(2) };
10268       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10269     }
10270
10271     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10272     //      (select_cc x, y, 1.0, 0.0,, cc)
10273     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10274         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10275         (!LegalOperations ||
10276          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10277       SDLoc DL(N);
10278       SDValue Ops[] =
10279         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10280           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10281           N0.getOperand(0).getOperand(2) };
10282       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10283     }
10284   }
10285
10286   return SDValue();
10287 }
10288
10289 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10290   SDValue N0 = N->getOperand(0);
10291   EVT VT = N->getValueType(0);
10292   EVT OpVT = N0.getValueType();
10293
10294   // fold (uint_to_fp c1) -> c1fp
10295   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10296       // ...but only if the target supports immediate floating-point values
10297       (!LegalOperations ||
10298        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10299     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10300
10301   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10302   // but SINT_TO_FP is legal on this target, try to convert.
10303   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10304       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10305     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10306     if (DAG.SignBitIsZero(N0))
10307       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10308   }
10309
10310   // The next optimizations are desirable only if SELECT_CC can be lowered.
10311   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10312     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10313
10314     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10315         (!LegalOperations ||
10316          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10317       SDLoc DL(N);
10318       SDValue Ops[] =
10319         { N0.getOperand(0), N0.getOperand(1),
10320           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10321           N0.getOperand(2) };
10322       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10323     }
10324   }
10325
10326   return SDValue();
10327 }
10328
10329 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10330 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10331   SDValue N0 = N->getOperand(0);
10332   EVT VT = N->getValueType(0);
10333
10334   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10335     return SDValue();
10336
10337   SDValue Src = N0.getOperand(0);
10338   EVT SrcVT = Src.getValueType();
10339   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10340   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10341
10342   // We can safely assume the conversion won't overflow the output range,
10343   // because (for example) (uint8_t)18293.f is undefined behavior.
10344
10345   // Since we can assume the conversion won't overflow, our decision as to
10346   // whether the input will fit in the float should depend on the minimum
10347   // of the input range and output range.
10348
10349   // This means this is also safe for a signed input and unsigned output, since
10350   // a negative input would lead to undefined behavior.
10351   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10352   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10353   unsigned ActualSize = std::min(InputSize, OutputSize);
10354   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10355
10356   // We can only fold away the float conversion if the input range can be
10357   // represented exactly in the float range.
10358   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10359     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10360       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10361                                                        : ISD::ZERO_EXTEND;
10362       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10363     }
10364     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10365       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10366     return DAG.getBitcast(VT, Src);
10367   }
10368   return SDValue();
10369 }
10370
10371 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10372   SDValue N0 = N->getOperand(0);
10373   EVT VT = N->getValueType(0);
10374
10375   // fold (fp_to_sint c1fp) -> c1
10376   if (isConstantFPBuildVectorOrConstantFP(N0))
10377     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10378
10379   return FoldIntToFPToInt(N, DAG);
10380 }
10381
10382 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10383   SDValue N0 = N->getOperand(0);
10384   EVT VT = N->getValueType(0);
10385
10386   // fold (fp_to_uint c1fp) -> c1
10387   if (isConstantFPBuildVectorOrConstantFP(N0))
10388     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10389
10390   return FoldIntToFPToInt(N, DAG);
10391 }
10392
10393 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10394   SDValue N0 = N->getOperand(0);
10395   SDValue N1 = N->getOperand(1);
10396   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10397   EVT VT = N->getValueType(0);
10398
10399   // fold (fp_round c1fp) -> c1fp
10400   if (N0CFP)
10401     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10402
10403   // fold (fp_round (fp_extend x)) -> x
10404   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10405     return N0.getOperand(0);
10406
10407   // fold (fp_round (fp_round x)) -> (fp_round x)
10408   if (N0.getOpcode() == ISD::FP_ROUND) {
10409     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10410     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10411
10412     // Skip this folding if it results in an fp_round from f80 to f16.
10413     //
10414     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10415     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10416     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10417     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10418     // x86.
10419     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10420       return SDValue();
10421
10422     // If the first fp_round isn't a value preserving truncation, it might
10423     // introduce a tie in the second fp_round, that wouldn't occur in the
10424     // single-step fp_round we want to fold to.
10425     // In other words, double rounding isn't the same as rounding.
10426     // Also, this is a value preserving truncation iff both fp_round's are.
10427     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10428       SDLoc DL(N);
10429       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10430                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10431     }
10432   }
10433
10434   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10435   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10436     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10437                               N0.getOperand(0), N1);
10438     AddToWorklist(Tmp.getNode());
10439     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10440                        Tmp, N0.getOperand(1));
10441   }
10442
10443   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10444     return NewVSel;
10445
10446   return SDValue();
10447 }
10448
10449 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10450   SDValue N0 = N->getOperand(0);
10451   EVT VT = N->getValueType(0);
10452   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10453   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10454
10455   // fold (fp_round_inreg c1fp) -> c1fp
10456   if (N0CFP && isTypeLegal(EVT)) {
10457     SDLoc DL(N);
10458     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10459     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10460   }
10461
10462   return SDValue();
10463 }
10464
10465 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10466   SDValue N0 = N->getOperand(0);
10467   EVT VT = N->getValueType(0);
10468
10469   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10470   if (N->hasOneUse() &&
10471       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10472     return SDValue();
10473
10474   // fold (fp_extend c1fp) -> c1fp
10475   if (isConstantFPBuildVectorOrConstantFP(N0))
10476     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10477
10478   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10479   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10480       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10481     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10482
10483   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10484   // value of X.
10485   if (N0.getOpcode() == ISD::FP_ROUND
10486       && N0.getConstantOperandVal(1) == 1) {
10487     SDValue In = N0.getOperand(0);
10488     if (In.getValueType() == VT) return In;
10489     if (VT.bitsLT(In.getValueType()))
10490       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10491                          In, N0.getOperand(1));
10492     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10493   }
10494
10495   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10496   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10497        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10498     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10499     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10500                                      LN0->getChain(),
10501                                      LN0->getBasePtr(), N0.getValueType(),
10502                                      LN0->getMemOperand());
10503     CombineTo(N, ExtLoad);
10504     CombineTo(N0.getNode(),
10505               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10506                           N0.getValueType(), ExtLoad,
10507                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10508               ExtLoad.getValue(1));
10509     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10510   }
10511
10512   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10513     return NewVSel;
10514
10515   return SDValue();
10516 }
10517
10518 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10519   SDValue N0 = N->getOperand(0);
10520   EVT VT = N->getValueType(0);
10521
10522   // fold (fceil c1) -> fceil(c1)
10523   if (isConstantFPBuildVectorOrConstantFP(N0))
10524     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10525
10526   return SDValue();
10527 }
10528
10529 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10530   SDValue N0 = N->getOperand(0);
10531   EVT VT = N->getValueType(0);
10532
10533   // fold (ftrunc c1) -> ftrunc(c1)
10534   if (isConstantFPBuildVectorOrConstantFP(N0))
10535     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10536
10537   return SDValue();
10538 }
10539
10540 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10541   SDValue N0 = N->getOperand(0);
10542   EVT VT = N->getValueType(0);
10543
10544   // fold (ffloor c1) -> ffloor(c1)
10545   if (isConstantFPBuildVectorOrConstantFP(N0))
10546     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10547
10548   return SDValue();
10549 }
10550
10551 // FIXME: FNEG and FABS have a lot in common; refactor.
10552 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10553   SDValue N0 = N->getOperand(0);
10554   EVT VT = N->getValueType(0);
10555
10556   // Constant fold FNEG.
10557   if (isConstantFPBuildVectorOrConstantFP(N0))
10558     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10559
10560   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10561                          &DAG.getTarget().Options))
10562     return GetNegatedExpression(N0, DAG, LegalOperations);
10563
10564   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10565   // constant pool values.
10566   if (!TLI.isFNegFree(VT) &&
10567       N0.getOpcode() == ISD::BITCAST &&
10568       N0.getNode()->hasOneUse()) {
10569     SDValue Int = N0.getOperand(0);
10570     EVT IntVT = Int.getValueType();
10571     if (IntVT.isInteger() && !IntVT.isVector()) {
10572       APInt SignMask;
10573       if (N0.getValueType().isVector()) {
10574         // For a vector, get a mask such as 0x80... per scalar element
10575         // and splat it.
10576         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10577         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10578       } else {
10579         // For a scalar, just generate 0x80...
10580         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10581       }
10582       SDLoc DL0(N0);
10583       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10584                         DAG.getConstant(SignMask, DL0, IntVT));
10585       AddToWorklist(Int.getNode());
10586       return DAG.getBitcast(VT, Int);
10587     }
10588   }
10589
10590   // (fneg (fmul c, x)) -> (fmul -c, x)
10591   if (N0.getOpcode() == ISD::FMUL &&
10592       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10593     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10594     if (CFP1) {
10595       APFloat CVal = CFP1->getValueAPF();
10596       CVal.changeSign();
10597       if (Level >= AfterLegalizeDAG &&
10598           (TLI.isFPImmLegal(CVal, VT) ||
10599            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10600         return DAG.getNode(
10601             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10602             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10603             N0->getFlags());
10604     }
10605   }
10606
10607   return SDValue();
10608 }
10609
10610 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10611   SDValue N0 = N->getOperand(0);
10612   SDValue N1 = N->getOperand(1);
10613   EVT VT = N->getValueType(0);
10614   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10615   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10616
10617   if (N0CFP && N1CFP) {
10618     const APFloat &C0 = N0CFP->getValueAPF();
10619     const APFloat &C1 = N1CFP->getValueAPF();
10620     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10621   }
10622
10623   // Canonicalize to constant on RHS.
10624   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10625      !isConstantFPBuildVectorOrConstantFP(N1))
10626     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10627
10628   return SDValue();
10629 }
10630
10631 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10632   SDValue N0 = N->getOperand(0);
10633   SDValue N1 = N->getOperand(1);
10634   EVT VT = N->getValueType(0);
10635   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10636   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10637
10638   if (N0CFP && N1CFP) {
10639     const APFloat &C0 = N0CFP->getValueAPF();
10640     const APFloat &C1 = N1CFP->getValueAPF();
10641     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10642   }
10643
10644   // Canonicalize to constant on RHS.
10645   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10646      !isConstantFPBuildVectorOrConstantFP(N1))
10647     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10648
10649   return SDValue();
10650 }
10651
10652 SDValue DAGCombiner::visitFABS(SDNode *N) {
10653   SDValue N0 = N->getOperand(0);
10654   EVT VT = N->getValueType(0);
10655
10656   // fold (fabs c1) -> fabs(c1)
10657   if (isConstantFPBuildVectorOrConstantFP(N0))
10658     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10659
10660   // fold (fabs (fabs x)) -> (fabs x)
10661   if (N0.getOpcode() == ISD::FABS)
10662     return N->getOperand(0);
10663
10664   // fold (fabs (fneg x)) -> (fabs x)
10665   // fold (fabs (fcopysign x, y)) -> (fabs x)
10666   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10667     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10668
10669   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10670   // constant pool values.
10671   if (!TLI.isFAbsFree(VT) &&
10672       N0.getOpcode() == ISD::BITCAST &&
10673       N0.getNode()->hasOneUse()) {
10674     SDValue Int = N0.getOperand(0);
10675     EVT IntVT = Int.getValueType();
10676     if (IntVT.isInteger() && !IntVT.isVector()) {
10677       APInt SignMask;
10678       if (N0.getValueType().isVector()) {
10679         // For a vector, get a mask such as 0x7f... per scalar element
10680         // and splat it.
10681         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10682         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10683       } else {
10684         // For a scalar, just generate 0x7f...
10685         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10686       }
10687       SDLoc DL(N0);
10688       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10689                         DAG.getConstant(SignMask, DL, IntVT));
10690       AddToWorklist(Int.getNode());
10691       return DAG.getBitcast(N->getValueType(0), Int);
10692     }
10693   }
10694
10695   return SDValue();
10696 }
10697
10698 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10699   SDValue Chain = N->getOperand(0);
10700   SDValue N1 = N->getOperand(1);
10701   SDValue N2 = N->getOperand(2);
10702
10703   // If N is a constant we could fold this into a fallthrough or unconditional
10704   // branch. However that doesn't happen very often in normal code, because
10705   // Instcombine/SimplifyCFG should have handled the available opportunities.
10706   // If we did this folding here, it would be necessary to update the
10707   // MachineBasicBlock CFG, which is awkward.
10708
10709   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10710   // on the target.
10711   if (N1.getOpcode() == ISD::SETCC &&
10712       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10713                                    N1.getOperand(0).getValueType())) {
10714     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10715                        Chain, N1.getOperand(2),
10716                        N1.getOperand(0), N1.getOperand(1), N2);
10717   }
10718
10719   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10720       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10721        (N1.getOperand(0).hasOneUse() &&
10722         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10723     SDNode *Trunc = nullptr;
10724     if (N1.getOpcode() == ISD::TRUNCATE) {
10725       // Look pass the truncate.
10726       Trunc = N1.getNode();
10727       N1 = N1.getOperand(0);
10728     }
10729
10730     // Match this pattern so that we can generate simpler code:
10731     //
10732     //   %a = ...
10733     //   %b = and i32 %a, 2
10734     //   %c = srl i32 %b, 1
10735     //   brcond i32 %c ...
10736     //
10737     // into
10738     //
10739     //   %a = ...
10740     //   %b = and i32 %a, 2
10741     //   %c = setcc eq %b, 0
10742     //   brcond %c ...
10743     //
10744     // This applies only when the AND constant value has one bit set and the
10745     // SRL constant is equal to the log2 of the AND constant. The back-end is
10746     // smart enough to convert the result into a TEST/JMP sequence.
10747     SDValue Op0 = N1.getOperand(0);
10748     SDValue Op1 = N1.getOperand(1);
10749
10750     if (Op0.getOpcode() == ISD::AND &&
10751         Op1.getOpcode() == ISD::Constant) {
10752       SDValue AndOp1 = Op0.getOperand(1);
10753
10754       if (AndOp1.getOpcode() == ISD::Constant) {
10755         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10756
10757         if (AndConst.isPowerOf2() &&
10758             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10759           SDLoc DL(N);
10760           SDValue SetCC =
10761             DAG.getSetCC(DL,
10762                          getSetCCResultType(Op0.getValueType()),
10763                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10764                          ISD::SETNE);
10765
10766           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10767                                           MVT::Other, Chain, SetCC, N2);
10768           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10769           // will convert it back to (X & C1) >> C2.
10770           CombineTo(N, NewBRCond, false);
10771           // Truncate is dead.
10772           if (Trunc)
10773             deleteAndRecombine(Trunc);
10774           // Replace the uses of SRL with SETCC
10775           WorklistRemover DeadNodes(*this);
10776           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10777           deleteAndRecombine(N1.getNode());
10778           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10779         }
10780       }
10781     }
10782
10783     if (Trunc)
10784       // Restore N1 if the above transformation doesn't match.
10785       N1 = N->getOperand(1);
10786   }
10787
10788   // Transform br(xor(x, y)) -> br(x != y)
10789   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10790   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10791     SDNode *TheXor = N1.getNode();
10792     SDValue Op0 = TheXor->getOperand(0);
10793     SDValue Op1 = TheXor->getOperand(1);
10794     if (Op0.getOpcode() == Op1.getOpcode()) {
10795       // Avoid missing important xor optimizations.
10796       if (SDValue Tmp = visitXOR(TheXor)) {
10797         if (Tmp.getNode() != TheXor) {
10798           DEBUG(dbgs() << "\nReplacing.8 ";
10799                 TheXor->dump(&DAG);
10800                 dbgs() << "\nWith: ";
10801                 Tmp.getNode()->dump(&DAG);
10802                 dbgs() << '\n');
10803           WorklistRemover DeadNodes(*this);
10804           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10805           deleteAndRecombine(TheXor);
10806           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10807                              MVT::Other, Chain, Tmp, N2);
10808         }
10809
10810         // visitXOR has changed XOR's operands or replaced the XOR completely,
10811         // bail out.
10812         return SDValue(N, 0);
10813       }
10814     }
10815
10816     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10817       bool Equal = false;
10818       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10819           Op0.getOpcode() == ISD::XOR) {
10820         TheXor = Op0.getNode();
10821         Equal = true;
10822       }
10823
10824       EVT SetCCVT = N1.getValueType();
10825       if (LegalTypes)
10826         SetCCVT = getSetCCResultType(SetCCVT);
10827       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10828                                    SetCCVT,
10829                                    Op0, Op1,
10830                                    Equal ? ISD::SETEQ : ISD::SETNE);
10831       // Replace the uses of XOR with SETCC
10832       WorklistRemover DeadNodes(*this);
10833       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10834       deleteAndRecombine(N1.getNode());
10835       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10836                          MVT::Other, Chain, SetCC, N2);
10837     }
10838   }
10839
10840   return SDValue();
10841 }
10842
10843 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10844 //
10845 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10846   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10847   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10848
10849   // If N is a constant we could fold this into a fallthrough or unconditional
10850   // branch. However that doesn't happen very often in normal code, because
10851   // Instcombine/SimplifyCFG should have handled the available opportunities.
10852   // If we did this folding here, it would be necessary to update the
10853   // MachineBasicBlock CFG, which is awkward.
10854
10855   // Use SimplifySetCC to simplify SETCC's.
10856   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10857                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10858                                false);
10859   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10860
10861   // fold to a simpler setcc
10862   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
10863     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10864                        N->getOperand(0), Simp.getOperand(2),
10865                        Simp.getOperand(0), Simp.getOperand(1),
10866                        N->getOperand(4));
10867
10868   return SDValue();
10869 }
10870
10871 /// Return true if 'Use' is a load or a store that uses N as its base pointer
10872 /// and that N may be folded in the load / store addressing mode.
10873 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
10874                                     SelectionDAG &DAG,
10875                                     const TargetLowering &TLI) {
10876   EVT VT;
10877   unsigned AS;
10878
10879   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
10880     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
10881       return false;
10882     VT = LD->getMemoryVT();
10883     AS = LD->getAddressSpace();
10884   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
10885     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
10886       return false;
10887     VT = ST->getMemoryVT();
10888     AS = ST->getAddressSpace();
10889   } else
10890     return false;
10891
10892   TargetLowering::AddrMode AM;
10893   if (N->getOpcode() == ISD::ADD) {
10894     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10895     if (Offset)
10896       // [reg +/- imm]
10897       AM.BaseOffs = Offset->getSExtValue();
10898     else
10899       // [reg +/- reg]
10900       AM.Scale = 1;
10901   } else if (N->getOpcode() == ISD::SUB) {
10902     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10903     if (Offset)
10904       // [reg +/- imm]
10905       AM.BaseOffs = -Offset->getSExtValue();
10906     else
10907       // [reg +/- reg]
10908       AM.Scale = 1;
10909   } else
10910     return false;
10911
10912   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
10913                                    VT.getTypeForEVT(*DAG.getContext()), AS);
10914 }
10915
10916 /// Try turning a load/store into a pre-indexed load/store when the base
10917 /// pointer is an add or subtract and it has other uses besides the load/store.
10918 /// After the transformation, the new indexed load/store has effectively folded
10919 /// the add/subtract in and all of its other uses are redirected to the
10920 /// new load/store.
10921 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
10922   if (Level < AfterLegalizeDAG)
10923     return false;
10924
10925   bool isLoad = true;
10926   SDValue Ptr;
10927   EVT VT;
10928   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10929     if (LD->isIndexed())
10930       return false;
10931     VT = LD->getMemoryVT();
10932     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
10933         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
10934       return false;
10935     Ptr = LD->getBasePtr();
10936   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10937     if (ST->isIndexed())
10938       return false;
10939     VT = ST->getMemoryVT();
10940     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
10941         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
10942       return false;
10943     Ptr = ST->getBasePtr();
10944     isLoad = false;
10945   } else {
10946     return false;
10947   }
10948
10949   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
10950   // out.  There is no reason to make this a preinc/predec.
10951   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
10952       Ptr.getNode()->hasOneUse())
10953     return false;
10954
10955   // Ask the target to do addressing mode selection.
10956   SDValue BasePtr;
10957   SDValue Offset;
10958   ISD::MemIndexedMode AM = ISD::UNINDEXED;
10959   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
10960     return false;
10961
10962   // Backends without true r+i pre-indexed forms may need to pass a
10963   // constant base with a variable offset so that constant coercion
10964   // will work with the patterns in canonical form.
10965   bool Swapped = false;
10966   if (isa<ConstantSDNode>(BasePtr)) {
10967     std::swap(BasePtr, Offset);
10968     Swapped = true;
10969   }
10970
10971   // Don't create a indexed load / store with zero offset.
10972   if (isNullConstant(Offset))
10973     return false;
10974
10975   // Try turning it into a pre-indexed load / store except when:
10976   // 1) The new base ptr is a frame index.
10977   // 2) If N is a store and the new base ptr is either the same as or is a
10978   //    predecessor of the value being stored.
10979   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
10980   //    that would create a cycle.
10981   // 4) All uses are load / store ops that use it as old base ptr.
10982
10983   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
10984   // (plus the implicit offset) to a register to preinc anyway.
10985   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10986     return false;
10987
10988   // Check #2.
10989   if (!isLoad) {
10990     SDValue Val = cast<StoreSDNode>(N)->getValue();
10991     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
10992       return false;
10993   }
10994
10995   // Caches for hasPredecessorHelper.
10996   SmallPtrSet<const SDNode *, 32> Visited;
10997   SmallVector<const SDNode *, 16> Worklist;
10998   Worklist.push_back(N);
10999
11000   // If the offset is a constant, there may be other adds of constants that
11001   // can be folded with this one. We should do this to avoid having to keep
11002   // a copy of the original base pointer.
11003   SmallVector<SDNode *, 16> OtherUses;
11004   if (isa<ConstantSDNode>(Offset))
11005     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11006                               UE = BasePtr.getNode()->use_end();
11007          UI != UE; ++UI) {
11008       SDUse &Use = UI.getUse();
11009       // Skip the use that is Ptr and uses of other results from BasePtr's
11010       // node (important for nodes that return multiple results).
11011       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11012         continue;
11013
11014       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11015         continue;
11016
11017       if (Use.getUser()->getOpcode() != ISD::ADD &&
11018           Use.getUser()->getOpcode() != ISD::SUB) {
11019         OtherUses.clear();
11020         break;
11021       }
11022
11023       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11024       if (!isa<ConstantSDNode>(Op1)) {
11025         OtherUses.clear();
11026         break;
11027       }
11028
11029       // FIXME: In some cases, we can be smarter about this.
11030       if (Op1.getValueType() != Offset.getValueType()) {
11031         OtherUses.clear();
11032         break;
11033       }
11034
11035       OtherUses.push_back(Use.getUser());
11036     }
11037
11038   if (Swapped)
11039     std::swap(BasePtr, Offset);
11040
11041   // Now check for #3 and #4.
11042   bool RealUse = false;
11043
11044   for (SDNode *Use : Ptr.getNode()->uses()) {
11045     if (Use == N)
11046       continue;
11047     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11048       return false;
11049
11050     // If Ptr may be folded in addressing mode of other use, then it's
11051     // not profitable to do this transformation.
11052     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11053       RealUse = true;
11054   }
11055
11056   if (!RealUse)
11057     return false;
11058
11059   SDValue Result;
11060   if (isLoad)
11061     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11062                                 BasePtr, Offset, AM);
11063   else
11064     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11065                                  BasePtr, Offset, AM);
11066   ++PreIndexedNodes;
11067   ++NodesCombined;
11068   DEBUG(dbgs() << "\nReplacing.4 ";
11069         N->dump(&DAG);
11070         dbgs() << "\nWith: ";
11071         Result.getNode()->dump(&DAG);
11072         dbgs() << '\n');
11073   WorklistRemover DeadNodes(*this);
11074   if (isLoad) {
11075     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11076     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11077   } else {
11078     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11079   }
11080
11081   // Finally, since the node is now dead, remove it from the graph.
11082   deleteAndRecombine(N);
11083
11084   if (Swapped)
11085     std::swap(BasePtr, Offset);
11086
11087   // Replace other uses of BasePtr that can be updated to use Ptr
11088   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11089     unsigned OffsetIdx = 1;
11090     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11091       OffsetIdx = 0;
11092     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11093            BasePtr.getNode() && "Expected BasePtr operand");
11094
11095     // We need to replace ptr0 in the following expression:
11096     //   x0 * offset0 + y0 * ptr0 = t0
11097     // knowing that
11098     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11099     //
11100     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11101     // indexed load/store and the expresion that needs to be re-written.
11102     //
11103     // Therefore, we have:
11104     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11105
11106     ConstantSDNode *CN =
11107       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11108     int X0, X1, Y0, Y1;
11109     const APInt &Offset0 = CN->getAPIntValue();
11110     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11111
11112     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11113     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11114     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11115     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11116
11117     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11118
11119     APInt CNV = Offset0;
11120     if (X0 < 0) CNV = -CNV;
11121     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11122     else CNV = CNV - Offset1;
11123
11124     SDLoc DL(OtherUses[i]);
11125
11126     // We can now generate the new expression.
11127     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11128     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11129
11130     SDValue NewUse = DAG.getNode(Opcode,
11131                                  DL,
11132                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11133     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11134     deleteAndRecombine(OtherUses[i]);
11135   }
11136
11137   // Replace the uses of Ptr with uses of the updated base value.
11138   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11139   deleteAndRecombine(Ptr.getNode());
11140
11141   return true;
11142 }
11143
11144 /// Try to combine a load/store with a add/sub of the base pointer node into a
11145 /// post-indexed load/store. The transformation folded the add/subtract into the
11146 /// new indexed load/store effectively and all of its uses are redirected to the
11147 /// new load/store.
11148 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11149   if (Level < AfterLegalizeDAG)
11150     return false;
11151
11152   bool isLoad = true;
11153   SDValue Ptr;
11154   EVT VT;
11155   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11156     if (LD->isIndexed())
11157       return false;
11158     VT = LD->getMemoryVT();
11159     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11160         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11161       return false;
11162     Ptr = LD->getBasePtr();
11163   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11164     if (ST->isIndexed())
11165       return false;
11166     VT = ST->getMemoryVT();
11167     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11168         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11169       return false;
11170     Ptr = ST->getBasePtr();
11171     isLoad = false;
11172   } else {
11173     return false;
11174   }
11175
11176   if (Ptr.getNode()->hasOneUse())
11177     return false;
11178
11179   for (SDNode *Op : Ptr.getNode()->uses()) {
11180     if (Op == N ||
11181         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11182       continue;
11183
11184     SDValue BasePtr;
11185     SDValue Offset;
11186     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11187     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11188       // Don't create a indexed load / store with zero offset.
11189       if (isNullConstant(Offset))
11190         continue;
11191
11192       // Try turning it into a post-indexed load / store except when
11193       // 1) All uses are load / store ops that use it as base ptr (and
11194       //    it may be folded as addressing mmode).
11195       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11196       //    nor a successor of N. Otherwise, if Op is folded that would
11197       //    create a cycle.
11198
11199       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11200         continue;
11201
11202       // Check for #1.
11203       bool TryNext = false;
11204       for (SDNode *Use : BasePtr.getNode()->uses()) {
11205         if (Use == Ptr.getNode())
11206           continue;
11207
11208         // If all the uses are load / store addresses, then don't do the
11209         // transformation.
11210         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11211           bool RealUse = false;
11212           for (SDNode *UseUse : Use->uses()) {
11213             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11214               RealUse = true;
11215           }
11216
11217           if (!RealUse) {
11218             TryNext = true;
11219             break;
11220           }
11221         }
11222       }
11223
11224       if (TryNext)
11225         continue;
11226
11227       // Check for #2
11228       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11229         SDValue Result = isLoad
11230           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11231                                BasePtr, Offset, AM)
11232           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11233                                 BasePtr, Offset, AM);
11234         ++PostIndexedNodes;
11235         ++NodesCombined;
11236         DEBUG(dbgs() << "\nReplacing.5 ";
11237               N->dump(&DAG);
11238               dbgs() << "\nWith: ";
11239               Result.getNode()->dump(&DAG);
11240               dbgs() << '\n');
11241         WorklistRemover DeadNodes(*this);
11242         if (isLoad) {
11243           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11244           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11245         } else {
11246           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11247         }
11248
11249         // Finally, since the node is now dead, remove it from the graph.
11250         deleteAndRecombine(N);
11251
11252         // Replace the uses of Use with uses of the updated base value.
11253         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11254                                       Result.getValue(isLoad ? 1 : 0));
11255         deleteAndRecombine(Op);
11256         return true;
11257       }
11258     }
11259   }
11260
11261   return false;
11262 }
11263
11264 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11265 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11266   ISD::MemIndexedMode AM = LD->getAddressingMode();
11267   assert(AM != ISD::UNINDEXED);
11268   SDValue BP = LD->getOperand(1);
11269   SDValue Inc = LD->getOperand(2);
11270
11271   // Some backends use TargetConstants for load offsets, but don't expect
11272   // TargetConstants in general ADD nodes. We can convert these constants into
11273   // regular Constants (if the constant is not opaque).
11274   assert((Inc.getOpcode() != ISD::TargetConstant ||
11275           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11276          "Cannot split out indexing using opaque target constants");
11277   if (Inc.getOpcode() == ISD::TargetConstant) {
11278     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11279     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11280                           ConstInc->getValueType(0));
11281   }
11282
11283   unsigned Opc =
11284       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11285   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11286 }
11287
11288 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11289   LoadSDNode *LD  = cast<LoadSDNode>(N);
11290   SDValue Chain = LD->getChain();
11291   SDValue Ptr   = LD->getBasePtr();
11292
11293   // If load is not volatile and there are no uses of the loaded value (and
11294   // the updated indexed value in case of indexed loads), change uses of the
11295   // chain value into uses of the chain input (i.e. delete the dead load).
11296   if (!LD->isVolatile()) {
11297     if (N->getValueType(1) == MVT::Other) {
11298       // Unindexed loads.
11299       if (!N->hasAnyUseOfValue(0)) {
11300         // It's not safe to use the two value CombineTo variant here. e.g.
11301         // v1, chain2 = load chain1, loc
11302         // v2, chain3 = load chain2, loc
11303         // v3         = add v2, c
11304         // Now we replace use of chain2 with chain1.  This makes the second load
11305         // isomorphic to the one we are deleting, and thus makes this load live.
11306         DEBUG(dbgs() << "\nReplacing.6 ";
11307               N->dump(&DAG);
11308               dbgs() << "\nWith chain: ";
11309               Chain.getNode()->dump(&DAG);
11310               dbgs() << "\n");
11311         WorklistRemover DeadNodes(*this);
11312         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11313         AddUsersToWorklist(Chain.getNode());
11314         if (N->use_empty())
11315           deleteAndRecombine(N);
11316
11317         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11318       }
11319     } else {
11320       // Indexed loads.
11321       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11322
11323       // If this load has an opaque TargetConstant offset, then we cannot split
11324       // the indexing into an add/sub directly (that TargetConstant may not be
11325       // valid for a different type of node, and we cannot convert an opaque
11326       // target constant into a regular constant).
11327       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11328                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11329
11330       if (!N->hasAnyUseOfValue(0) &&
11331           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11332         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11333         SDValue Index;
11334         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11335           Index = SplitIndexingFromLoad(LD);
11336           // Try to fold the base pointer arithmetic into subsequent loads and
11337           // stores.
11338           AddUsersToWorklist(N);
11339         } else
11340           Index = DAG.getUNDEF(N->getValueType(1));
11341         DEBUG(dbgs() << "\nReplacing.7 ";
11342               N->dump(&DAG);
11343               dbgs() << "\nWith: ";
11344               Undef.getNode()->dump(&DAG);
11345               dbgs() << " and 2 other values\n");
11346         WorklistRemover DeadNodes(*this);
11347         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11348         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11349         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11350         deleteAndRecombine(N);
11351         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11352       }
11353     }
11354   }
11355
11356   // If this load is directly stored, replace the load value with the stored
11357   // value.
11358   // TODO: Handle store large -> read small portion.
11359   // TODO: Handle TRUNCSTORE/LOADEXT
11360   if (OptLevel != CodeGenOpt::None &&
11361       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11362     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11363       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11364       if (PrevST->getBasePtr() == Ptr &&
11365           PrevST->getValue().getValueType() == N->getValueType(0))
11366         return CombineTo(N, PrevST->getOperand(1), Chain);
11367     }
11368   }
11369
11370   // Try to infer better alignment information than the load already has.
11371   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11372     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11373       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11374         SDValue NewLoad = DAG.getExtLoad(
11375             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11376             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11377             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11378         if (NewLoad.getNode() != N)
11379           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11380       }
11381     }
11382   }
11383
11384   if (LD->isUnindexed()) {
11385     // Walk up chain skipping non-aliasing memory nodes.
11386     SDValue BetterChain = FindBetterChain(N, Chain);
11387
11388     // If there is a better chain.
11389     if (Chain != BetterChain) {
11390       SDValue ReplLoad;
11391
11392       // Replace the chain to void dependency.
11393       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11394         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11395                                BetterChain, Ptr, LD->getMemOperand());
11396       } else {
11397         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11398                                   LD->getValueType(0),
11399                                   BetterChain, Ptr, LD->getMemoryVT(),
11400                                   LD->getMemOperand());
11401       }
11402
11403       // Create token factor to keep old chain connected.
11404       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11405                                   MVT::Other, Chain, ReplLoad.getValue(1));
11406
11407       // Make sure the new and old chains are cleaned up.
11408       AddToWorklist(Token.getNode());
11409
11410       // Replace uses with load result and token factor. Don't add users
11411       // to work list.
11412       return CombineTo(N, ReplLoad.getValue(0), Token, false);
11413     }
11414   }
11415
11416   // Try transforming N to an indexed load.
11417   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11418     return SDValue(N, 0);
11419
11420   // Try to slice up N to more direct loads if the slices are mapped to
11421   // different register banks or pairing can take place.
11422   if (SliceUpLoad(N))
11423     return SDValue(N, 0);
11424
11425   return SDValue();
11426 }
11427
11428 namespace {
11429 /// \brief Helper structure used to slice a load in smaller loads.
11430 /// Basically a slice is obtained from the following sequence:
11431 /// Origin = load Ty1, Base
11432 /// Shift = srl Ty1 Origin, CstTy Amount
11433 /// Inst = trunc Shift to Ty2
11434 ///
11435 /// Then, it will be rewriten into:
11436 /// Slice = load SliceTy, Base + SliceOffset
11437 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11438 ///
11439 /// SliceTy is deduced from the number of bits that are actually used to
11440 /// build Inst.
11441 struct LoadedSlice {
11442   /// \brief Helper structure used to compute the cost of a slice.
11443   struct Cost {
11444     /// Are we optimizing for code size.
11445     bool ForCodeSize;
11446     /// Various cost.
11447     unsigned Loads;
11448     unsigned Truncates;
11449     unsigned CrossRegisterBanksCopies;
11450     unsigned ZExts;
11451     unsigned Shift;
11452
11453     Cost(bool ForCodeSize = false)
11454         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11455           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11456
11457     /// \brief Get the cost of one isolated slice.
11458     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11459         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11460           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11461       EVT TruncType = LS.Inst->getValueType(0);
11462       EVT LoadedType = LS.getLoadedType();
11463       if (TruncType != LoadedType &&
11464           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11465         ZExts = 1;
11466     }
11467
11468     /// \brief Account for slicing gain in the current cost.
11469     /// Slicing provide a few gains like removing a shift or a
11470     /// truncate. This method allows to grow the cost of the original
11471     /// load with the gain from this slice.
11472     void addSliceGain(const LoadedSlice &LS) {
11473       // Each slice saves a truncate.
11474       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11475       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11476                               LS.Inst->getValueType(0)))
11477         ++Truncates;
11478       // If there is a shift amount, this slice gets rid of it.
11479       if (LS.Shift)
11480         ++Shift;
11481       // If this slice can merge a cross register bank copy, account for it.
11482       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11483         ++CrossRegisterBanksCopies;
11484     }
11485
11486     Cost &operator+=(const Cost &RHS) {
11487       Loads += RHS.Loads;
11488       Truncates += RHS.Truncates;
11489       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11490       ZExts += RHS.ZExts;
11491       Shift += RHS.Shift;
11492       return *this;
11493     }
11494
11495     bool operator==(const Cost &RHS) const {
11496       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11497              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11498              ZExts == RHS.ZExts && Shift == RHS.Shift;
11499     }
11500
11501     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11502
11503     bool operator<(const Cost &RHS) const {
11504       // Assume cross register banks copies are as expensive as loads.
11505       // FIXME: Do we want some more target hooks?
11506       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11507       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11508       // Unless we are optimizing for code size, consider the
11509       // expensive operation first.
11510       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11511         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11512       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11513              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11514     }
11515
11516     bool operator>(const Cost &RHS) const { return RHS < *this; }
11517
11518     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11519
11520     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11521   };
11522   // The last instruction that represent the slice. This should be a
11523   // truncate instruction.
11524   SDNode *Inst;
11525   // The original load instruction.
11526   LoadSDNode *Origin;
11527   // The right shift amount in bits from the original load.
11528   unsigned Shift;
11529   // The DAG from which Origin came from.
11530   // This is used to get some contextual information about legal types, etc.
11531   SelectionDAG *DAG;
11532
11533   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11534               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11535       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11536
11537   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11538   /// \return Result is \p BitWidth and has used bits set to 1 and
11539   ///         not used bits set to 0.
11540   APInt getUsedBits() const {
11541     // Reproduce the trunc(lshr) sequence:
11542     // - Start from the truncated value.
11543     // - Zero extend to the desired bit width.
11544     // - Shift left.
11545     assert(Origin && "No original load to compare against.");
11546     unsigned BitWidth = Origin->getValueSizeInBits(0);
11547     assert(Inst && "This slice is not bound to an instruction");
11548     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11549            "Extracted slice is bigger than the whole type!");
11550     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11551     UsedBits.setAllBits();
11552     UsedBits = UsedBits.zext(BitWidth);
11553     UsedBits <<= Shift;
11554     return UsedBits;
11555   }
11556
11557   /// \brief Get the size of the slice to be loaded in bytes.
11558   unsigned getLoadedSize() const {
11559     unsigned SliceSize = getUsedBits().countPopulation();
11560     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11561     return SliceSize / 8;
11562   }
11563
11564   /// \brief Get the type that will be loaded for this slice.
11565   /// Note: This may not be the final type for the slice.
11566   EVT getLoadedType() const {
11567     assert(DAG && "Missing context");
11568     LLVMContext &Ctxt = *DAG->getContext();
11569     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11570   }
11571
11572   /// \brief Get the alignment of the load used for this slice.
11573   unsigned getAlignment() const {
11574     unsigned Alignment = Origin->getAlignment();
11575     unsigned Offset = getOffsetFromBase();
11576     if (Offset != 0)
11577       Alignment = MinAlign(Alignment, Alignment + Offset);
11578     return Alignment;
11579   }
11580
11581   /// \brief Check if this slice can be rewritten with legal operations.
11582   bool isLegal() const {
11583     // An invalid slice is not legal.
11584     if (!Origin || !Inst || !DAG)
11585       return false;
11586
11587     // Offsets are for indexed load only, we do not handle that.
11588     if (!Origin->getOffset().isUndef())
11589       return false;
11590
11591     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11592
11593     // Check that the type is legal.
11594     EVT SliceType = getLoadedType();
11595     if (!TLI.isTypeLegal(SliceType))
11596       return false;
11597
11598     // Check that the load is legal for this type.
11599     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11600       return false;
11601
11602     // Check that the offset can be computed.
11603     // 1. Check its type.
11604     EVT PtrType = Origin->getBasePtr().getValueType();
11605     if (PtrType == MVT::Untyped || PtrType.isExtended())
11606       return false;
11607
11608     // 2. Check that it fits in the immediate.
11609     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11610       return false;
11611
11612     // 3. Check that the computation is legal.
11613     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11614       return false;
11615
11616     // Check that the zext is legal if it needs one.
11617     EVT TruncateType = Inst->getValueType(0);
11618     if (TruncateType != SliceType &&
11619         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11620       return false;
11621
11622     return true;
11623   }
11624
11625   /// \brief Get the offset in bytes of this slice in the original chunk of
11626   /// bits.
11627   /// \pre DAG != nullptr.
11628   uint64_t getOffsetFromBase() const {
11629     assert(DAG && "Missing context.");
11630     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11631     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11632     uint64_t Offset = Shift / 8;
11633     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11634     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11635            "The size of the original loaded type is not a multiple of a"
11636            " byte.");
11637     // If Offset is bigger than TySizeInBytes, it means we are loading all
11638     // zeros. This should have been optimized before in the process.
11639     assert(TySizeInBytes > Offset &&
11640            "Invalid shift amount for given loaded size");
11641     if (IsBigEndian)
11642       Offset = TySizeInBytes - Offset - getLoadedSize();
11643     return Offset;
11644   }
11645
11646   /// \brief Generate the sequence of instructions to load the slice
11647   /// represented by this object and redirect the uses of this slice to
11648   /// this new sequence of instructions.
11649   /// \pre this->Inst && this->Origin are valid Instructions and this
11650   /// object passed the legal check: LoadedSlice::isLegal returned true.
11651   /// \return The last instruction of the sequence used to load the slice.
11652   SDValue loadSlice() const {
11653     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11654     const SDValue &OldBaseAddr = Origin->getBasePtr();
11655     SDValue BaseAddr = OldBaseAddr;
11656     // Get the offset in that chunk of bytes w.r.t. the endianness.
11657     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11658     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11659     if (Offset) {
11660       // BaseAddr = BaseAddr + Offset.
11661       EVT ArithType = BaseAddr.getValueType();
11662       SDLoc DL(Origin);
11663       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11664                               DAG->getConstant(Offset, DL, ArithType));
11665     }
11666
11667     // Create the type of the loaded slice according to its size.
11668     EVT SliceType = getLoadedType();
11669
11670     // Create the load for the slice.
11671     SDValue LastInst =
11672         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11673                      Origin->getPointerInfo().getWithOffset(Offset),
11674                      getAlignment(), Origin->getMemOperand()->getFlags());
11675     // If the final type is not the same as the loaded type, this means that
11676     // we have to pad with zero. Create a zero extend for that.
11677     EVT FinalType = Inst->getValueType(0);
11678     if (SliceType != FinalType)
11679       LastInst =
11680           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11681     return LastInst;
11682   }
11683
11684   /// \brief Check if this slice can be merged with an expensive cross register
11685   /// bank copy. E.g.,
11686   /// i = load i32
11687   /// f = bitcast i32 i to float
11688   bool canMergeExpensiveCrossRegisterBankCopy() const {
11689     if (!Inst || !Inst->hasOneUse())
11690       return false;
11691     SDNode *Use = *Inst->use_begin();
11692     if (Use->getOpcode() != ISD::BITCAST)
11693       return false;
11694     assert(DAG && "Missing context");
11695     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11696     EVT ResVT = Use->getValueType(0);
11697     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11698     const TargetRegisterClass *ArgRC =
11699         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11700     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11701       return false;
11702
11703     // At this point, we know that we perform a cross-register-bank copy.
11704     // Check if it is expensive.
11705     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11706     // Assume bitcasts are cheap, unless both register classes do not
11707     // explicitly share a common sub class.
11708     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11709       return false;
11710
11711     // Check if it will be merged with the load.
11712     // 1. Check the alignment constraint.
11713     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11714         ResVT.getTypeForEVT(*DAG->getContext()));
11715
11716     if (RequiredAlignment > getAlignment())
11717       return false;
11718
11719     // 2. Check that the load is a legal operation for that type.
11720     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11721       return false;
11722
11723     // 3. Check that we do not have a zext in the way.
11724     if (Inst->getValueType(0) != getLoadedType())
11725       return false;
11726
11727     return true;
11728   }
11729 };
11730 }
11731
11732 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11733 /// \p UsedBits looks like 0..0 1..1 0..0.
11734 static bool areUsedBitsDense(const APInt &UsedBits) {
11735   // If all the bits are one, this is dense!
11736   if (UsedBits.isAllOnesValue())
11737     return true;
11738
11739   // Get rid of the unused bits on the right.
11740   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11741   // Get rid of the unused bits on the left.
11742   if (NarrowedUsedBits.countLeadingZeros())
11743     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11744   // Check that the chunk of bits is completely used.
11745   return NarrowedUsedBits.isAllOnesValue();
11746 }
11747
11748 /// \brief Check whether or not \p First and \p Second are next to each other
11749 /// in memory. This means that there is no hole between the bits loaded
11750 /// by \p First and the bits loaded by \p Second.
11751 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11752                                      const LoadedSlice &Second) {
11753   assert(First.Origin == Second.Origin && First.Origin &&
11754          "Unable to match different memory origins.");
11755   APInt UsedBits = First.getUsedBits();
11756   assert((UsedBits & Second.getUsedBits()) == 0 &&
11757          "Slices are not supposed to overlap.");
11758   UsedBits |= Second.getUsedBits();
11759   return areUsedBitsDense(UsedBits);
11760 }
11761
11762 /// \brief Adjust the \p GlobalLSCost according to the target
11763 /// paring capabilities and the layout of the slices.
11764 /// \pre \p GlobalLSCost should account for at least as many loads as
11765 /// there is in the slices in \p LoadedSlices.
11766 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11767                                  LoadedSlice::Cost &GlobalLSCost) {
11768   unsigned NumberOfSlices = LoadedSlices.size();
11769   // If there is less than 2 elements, no pairing is possible.
11770   if (NumberOfSlices < 2)
11771     return;
11772
11773   // Sort the slices so that elements that are likely to be next to each
11774   // other in memory are next to each other in the list.
11775   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11776             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11777     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11778     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11779   });
11780   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11781   // First (resp. Second) is the first (resp. Second) potentially candidate
11782   // to be placed in a paired load.
11783   const LoadedSlice *First = nullptr;
11784   const LoadedSlice *Second = nullptr;
11785   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11786                 // Set the beginning of the pair.
11787                                                            First = Second) {
11788
11789     Second = &LoadedSlices[CurrSlice];
11790
11791     // If First is NULL, it means we start a new pair.
11792     // Get to the next slice.
11793     if (!First)
11794       continue;
11795
11796     EVT LoadedType = First->getLoadedType();
11797
11798     // If the types of the slices are different, we cannot pair them.
11799     if (LoadedType != Second->getLoadedType())
11800       continue;
11801
11802     // Check if the target supplies paired loads for this type.
11803     unsigned RequiredAlignment = 0;
11804     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11805       // move to the next pair, this type is hopeless.
11806       Second = nullptr;
11807       continue;
11808     }
11809     // Check if we meet the alignment requirement.
11810     if (RequiredAlignment > First->getAlignment())
11811       continue;
11812
11813     // Check that both loads are next to each other in memory.
11814     if (!areSlicesNextToEachOther(*First, *Second))
11815       continue;
11816
11817     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11818     --GlobalLSCost.Loads;
11819     // Move to the next pair.
11820     Second = nullptr;
11821   }
11822 }
11823
11824 /// \brief Check the profitability of all involved LoadedSlice.
11825 /// Currently, it is considered profitable if there is exactly two
11826 /// involved slices (1) which are (2) next to each other in memory, and
11827 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11828 ///
11829 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11830 /// the elements themselves.
11831 ///
11832 /// FIXME: When the cost model will be mature enough, we can relax
11833 /// constraints (1) and (2).
11834 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11835                                 const APInt &UsedBits, bool ForCodeSize) {
11836   unsigned NumberOfSlices = LoadedSlices.size();
11837   if (StressLoadSlicing)
11838     return NumberOfSlices > 1;
11839
11840   // Check (1).
11841   if (NumberOfSlices != 2)
11842     return false;
11843
11844   // Check (2).
11845   if (!areUsedBitsDense(UsedBits))
11846     return false;
11847
11848   // Check (3).
11849   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11850   // The original code has one big load.
11851   OrigCost.Loads = 1;
11852   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11853     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11854     // Accumulate the cost of all the slices.
11855     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11856     GlobalSlicingCost += SliceCost;
11857
11858     // Account as cost in the original configuration the gain obtained
11859     // with the current slices.
11860     OrigCost.addSliceGain(LS);
11861   }
11862
11863   // If the target supports paired load, adjust the cost accordingly.
11864   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11865   return OrigCost > GlobalSlicingCost;
11866 }
11867
11868 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
11869 /// operations, split it in the various pieces being extracted.
11870 ///
11871 /// This sort of thing is introduced by SROA.
11872 /// This slicing takes care not to insert overlapping loads.
11873 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
11874 bool DAGCombiner::SliceUpLoad(SDNode *N) {
11875   if (Level < AfterLegalizeDAG)
11876     return false;
11877
11878   LoadSDNode *LD = cast<LoadSDNode>(N);
11879   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
11880       !LD->getValueType(0).isInteger())
11881     return false;
11882
11883   // Keep track of already used bits to detect overlapping values.
11884   // In that case, we will just abort the transformation.
11885   APInt UsedBits(LD->getValueSizeInBits(0), 0);
11886
11887   SmallVector<LoadedSlice, 4> LoadedSlices;
11888
11889   // Check if this load is used as several smaller chunks of bits.
11890   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
11891   // of computation for each trunc.
11892   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
11893        UI != UIEnd; ++UI) {
11894     // Skip the uses of the chain.
11895     if (UI.getUse().getResNo() != 0)
11896       continue;
11897
11898     SDNode *User = *UI;
11899     unsigned Shift = 0;
11900
11901     // Check if this is a trunc(lshr).
11902     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
11903         isa<ConstantSDNode>(User->getOperand(1))) {
11904       Shift = User->getConstantOperandVal(1);
11905       User = *User->use_begin();
11906     }
11907
11908     // At this point, User is a Truncate, iff we encountered, trunc or
11909     // trunc(lshr).
11910     if (User->getOpcode() != ISD::TRUNCATE)
11911       return false;
11912
11913     // The width of the type must be a power of 2 and greater than 8-bits.
11914     // Otherwise the load cannot be represented in LLVM IR.
11915     // Moreover, if we shifted with a non-8-bits multiple, the slice
11916     // will be across several bytes. We do not support that.
11917     unsigned Width = User->getValueSizeInBits(0);
11918     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
11919       return 0;
11920
11921     // Build the slice for this chain of computations.
11922     LoadedSlice LS(User, LD, Shift, &DAG);
11923     APInt CurrentUsedBits = LS.getUsedBits();
11924
11925     // Check if this slice overlaps with another.
11926     if ((CurrentUsedBits & UsedBits) != 0)
11927       return false;
11928     // Update the bits used globally.
11929     UsedBits |= CurrentUsedBits;
11930
11931     // Check if the new slice would be legal.
11932     if (!LS.isLegal())
11933       return false;
11934
11935     // Record the slice.
11936     LoadedSlices.push_back(LS);
11937   }
11938
11939   // Abort slicing if it does not seem to be profitable.
11940   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
11941     return false;
11942
11943   ++SlicedLoads;
11944
11945   // Rewrite each chain to use an independent load.
11946   // By construction, each chain can be represented by a unique load.
11947
11948   // Prepare the argument for the new token factor for all the slices.
11949   SmallVector<SDValue, 8> ArgChains;
11950   for (SmallVectorImpl<LoadedSlice>::const_iterator
11951            LSIt = LoadedSlices.begin(),
11952            LSItEnd = LoadedSlices.end();
11953        LSIt != LSItEnd; ++LSIt) {
11954     SDValue SliceInst = LSIt->loadSlice();
11955     CombineTo(LSIt->Inst, SliceInst, true);
11956     if (SliceInst.getOpcode() != ISD::LOAD)
11957       SliceInst = SliceInst.getOperand(0);
11958     assert(SliceInst->getOpcode() == ISD::LOAD &&
11959            "It takes more than a zext to get to the loaded slice!!");
11960     ArgChains.push_back(SliceInst.getValue(1));
11961   }
11962
11963   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
11964                               ArgChains);
11965   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11966   AddToWorklist(Chain.getNode());
11967   return true;
11968 }
11969
11970 /// Check to see if V is (and load (ptr), imm), where the load is having
11971 /// specific bytes cleared out.  If so, return the byte size being masked out
11972 /// and the shift amount.
11973 static std::pair<unsigned, unsigned>
11974 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
11975   std::pair<unsigned, unsigned> Result(0, 0);
11976
11977   // Check for the structure we're looking for.
11978   if (V->getOpcode() != ISD::AND ||
11979       !isa<ConstantSDNode>(V->getOperand(1)) ||
11980       !ISD::isNormalLoad(V->getOperand(0).getNode()))
11981     return Result;
11982
11983   // Check the chain and pointer.
11984   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
11985   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
11986
11987   // The store should be chained directly to the load or be an operand of a
11988   // tokenfactor.
11989   if (LD == Chain.getNode())
11990     ; // ok.
11991   else if (Chain->getOpcode() != ISD::TokenFactor)
11992     return Result; // Fail.
11993   else {
11994     bool isOk = false;
11995     for (const SDValue &ChainOp : Chain->op_values())
11996       if (ChainOp.getNode() == LD) {
11997         isOk = true;
11998         break;
11999       }
12000     if (!isOk) return Result;
12001   }
12002
12003   // This only handles simple types.
12004   if (V.getValueType() != MVT::i16 &&
12005       V.getValueType() != MVT::i32 &&
12006       V.getValueType() != MVT::i64)
12007     return Result;
12008
12009   // Check the constant mask.  Invert it so that the bits being masked out are
12010   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12011   // follow the sign bit for uniformity.
12012   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12013   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12014   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12015   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12016   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12017   if (NotMaskLZ == 64) return Result;  // All zero mask.
12018
12019   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12020   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12021     return Result;
12022
12023   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12024   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12025     NotMaskLZ -= 64-V.getValueSizeInBits();
12026
12027   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12028   switch (MaskedBytes) {
12029   case 1:
12030   case 2:
12031   case 4: break;
12032   default: return Result; // All one mask, or 5-byte mask.
12033   }
12034
12035   // Verify that the first bit starts at a multiple of mask so that the access
12036   // is aligned the same as the access width.
12037   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12038
12039   Result.first = MaskedBytes;
12040   Result.second = NotMaskTZ/8;
12041   return Result;
12042 }
12043
12044
12045 /// Check to see if IVal is something that provides a value as specified by
12046 /// MaskInfo. If so, replace the specified store with a narrower store of
12047 /// truncated IVal.
12048 static SDNode *
12049 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12050                                 SDValue IVal, StoreSDNode *St,
12051                                 DAGCombiner *DC) {
12052   unsigned NumBytes = MaskInfo.first;
12053   unsigned ByteShift = MaskInfo.second;
12054   SelectionDAG &DAG = DC->getDAG();
12055
12056   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12057   // that uses this.  If not, this is not a replacement.
12058   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12059                                   ByteShift*8, (ByteShift+NumBytes)*8);
12060   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12061
12062   // Check that it is legal on the target to do this.  It is legal if the new
12063   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12064   // legalization.
12065   MVT VT = MVT::getIntegerVT(NumBytes*8);
12066   if (!DC->isTypeLegal(VT))
12067     return nullptr;
12068
12069   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12070   // shifted by ByteShift and truncated down to NumBytes.
12071   if (ByteShift) {
12072     SDLoc DL(IVal);
12073     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12074                        DAG.getConstant(ByteShift*8, DL,
12075                                     DC->getShiftAmountTy(IVal.getValueType())));
12076   }
12077
12078   // Figure out the offset for the store and the alignment of the access.
12079   unsigned StOffset;
12080   unsigned NewAlign = St->getAlignment();
12081
12082   if (DAG.getDataLayout().isLittleEndian())
12083     StOffset = ByteShift;
12084   else
12085     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12086
12087   SDValue Ptr = St->getBasePtr();
12088   if (StOffset) {
12089     SDLoc DL(IVal);
12090     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12091                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12092     NewAlign = MinAlign(NewAlign, StOffset);
12093   }
12094
12095   // Truncate down to the new size.
12096   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12097
12098   ++OpsNarrowed;
12099   return DAG
12100       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12101                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12102       .getNode();
12103 }
12104
12105
12106 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12107 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12108 /// narrowing the load and store if it would end up being a win for performance
12109 /// or code size.
12110 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12111   StoreSDNode *ST  = cast<StoreSDNode>(N);
12112   if (ST->isVolatile())
12113     return SDValue();
12114
12115   SDValue Chain = ST->getChain();
12116   SDValue Value = ST->getValue();
12117   SDValue Ptr   = ST->getBasePtr();
12118   EVT VT = Value.getValueType();
12119
12120   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12121     return SDValue();
12122
12123   unsigned Opc = Value.getOpcode();
12124
12125   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12126   // is a byte mask indicating a consecutive number of bytes, check to see if
12127   // Y is known to provide just those bytes.  If so, we try to replace the
12128   // load + replace + store sequence with a single (narrower) store, which makes
12129   // the load dead.
12130   if (Opc == ISD::OR) {
12131     std::pair<unsigned, unsigned> MaskedLoad;
12132     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12133     if (MaskedLoad.first)
12134       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12135                                                   Value.getOperand(1), ST,this))
12136         return SDValue(NewST, 0);
12137
12138     // Or is commutative, so try swapping X and Y.
12139     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12140     if (MaskedLoad.first)
12141       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12142                                                   Value.getOperand(0), ST,this))
12143         return SDValue(NewST, 0);
12144   }
12145
12146   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12147       Value.getOperand(1).getOpcode() != ISD::Constant)
12148     return SDValue();
12149
12150   SDValue N0 = Value.getOperand(0);
12151   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12152       Chain == SDValue(N0.getNode(), 1)) {
12153     LoadSDNode *LD = cast<LoadSDNode>(N0);
12154     if (LD->getBasePtr() != Ptr ||
12155         LD->getPointerInfo().getAddrSpace() !=
12156         ST->getPointerInfo().getAddrSpace())
12157       return SDValue();
12158
12159     // Find the type to narrow it the load / op / store to.
12160     SDValue N1 = Value.getOperand(1);
12161     unsigned BitWidth = N1.getValueSizeInBits();
12162     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12163     if (Opc == ISD::AND)
12164       Imm ^= APInt::getAllOnesValue(BitWidth);
12165     if (Imm == 0 || Imm.isAllOnesValue())
12166       return SDValue();
12167     unsigned ShAmt = Imm.countTrailingZeros();
12168     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12169     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12170     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12171     // The narrowing should be profitable, the load/store operation should be
12172     // legal (or custom) and the store size should be equal to the NewVT width.
12173     while (NewBW < BitWidth &&
12174            (NewVT.getStoreSizeInBits() != NewBW ||
12175             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12176             !TLI.isNarrowingProfitable(VT, NewVT))) {
12177       NewBW = NextPowerOf2(NewBW);
12178       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12179     }
12180     if (NewBW >= BitWidth)
12181       return SDValue();
12182
12183     // If the lsb changed does not start at the type bitwidth boundary,
12184     // start at the previous one.
12185     if (ShAmt % NewBW)
12186       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12187     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12188                                    std::min(BitWidth, ShAmt + NewBW));
12189     if ((Imm & Mask) == Imm) {
12190       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12191       if (Opc == ISD::AND)
12192         NewImm ^= APInt::getAllOnesValue(NewBW);
12193       uint64_t PtrOff = ShAmt / 8;
12194       // For big endian targets, we need to adjust the offset to the pointer to
12195       // load the correct bytes.
12196       if (DAG.getDataLayout().isBigEndian())
12197         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12198
12199       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12200       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12201       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12202         return SDValue();
12203
12204       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12205                                    Ptr.getValueType(), Ptr,
12206                                    DAG.getConstant(PtrOff, SDLoc(LD),
12207                                                    Ptr.getValueType()));
12208       SDValue NewLD =
12209           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12210                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12211                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12212       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12213                                    DAG.getConstant(NewImm, SDLoc(Value),
12214                                                    NewVT));
12215       SDValue NewST =
12216           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12217                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12218
12219       AddToWorklist(NewPtr.getNode());
12220       AddToWorklist(NewLD.getNode());
12221       AddToWorklist(NewVal.getNode());
12222       WorklistRemover DeadNodes(*this);
12223       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12224       ++OpsNarrowed;
12225       return NewST;
12226     }
12227   }
12228
12229   return SDValue();
12230 }
12231
12232 /// For a given floating point load / store pair, if the load value isn't used
12233 /// by any other operations, then consider transforming the pair to integer
12234 /// load / store operations if the target deems the transformation profitable.
12235 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12236   StoreSDNode *ST  = cast<StoreSDNode>(N);
12237   SDValue Chain = ST->getChain();
12238   SDValue Value = ST->getValue();
12239   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12240       Value.hasOneUse() &&
12241       Chain == SDValue(Value.getNode(), 1)) {
12242     LoadSDNode *LD = cast<LoadSDNode>(Value);
12243     EVT VT = LD->getMemoryVT();
12244     if (!VT.isFloatingPoint() ||
12245         VT != ST->getMemoryVT() ||
12246         LD->isNonTemporal() ||
12247         ST->isNonTemporal() ||
12248         LD->getPointerInfo().getAddrSpace() != 0 ||
12249         ST->getPointerInfo().getAddrSpace() != 0)
12250       return SDValue();
12251
12252     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12253     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12254         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12255         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12256         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12257       return SDValue();
12258
12259     unsigned LDAlign = LD->getAlignment();
12260     unsigned STAlign = ST->getAlignment();
12261     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12262     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12263     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12264       return SDValue();
12265
12266     SDValue NewLD =
12267         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12268                     LD->getPointerInfo(), LDAlign);
12269
12270     SDValue NewST =
12271         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12272                      ST->getPointerInfo(), STAlign);
12273
12274     AddToWorklist(NewLD.getNode());
12275     AddToWorklist(NewST.getNode());
12276     WorklistRemover DeadNodes(*this);
12277     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12278     ++LdStFP2Int;
12279     return NewST;
12280   }
12281
12282   return SDValue();
12283 }
12284
12285 // This is a helper function for visitMUL to check the profitability
12286 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12287 // MulNode is the original multiply, AddNode is (add x, c1),
12288 // and ConstNode is c2.
12289 //
12290 // If the (add x, c1) has multiple uses, we could increase
12291 // the number of adds if we make this transformation.
12292 // It would only be worth doing this if we can remove a
12293 // multiply in the process. Check for that here.
12294 // To illustrate:
12295 //     (A + c1) * c3
12296 //     (A + c2) * c3
12297 // We're checking for cases where we have common "c3 * A" expressions.
12298 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12299                                               SDValue &AddNode,
12300                                               SDValue &ConstNode) {
12301   APInt Val;
12302
12303   // If the add only has one use, this would be OK to do.
12304   if (AddNode.getNode()->hasOneUse())
12305     return true;
12306
12307   // Walk all the users of the constant with which we're multiplying.
12308   for (SDNode *Use : ConstNode->uses()) {
12309
12310     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12311       continue;
12312
12313     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12314       SDNode *OtherOp;
12315       SDNode *MulVar = AddNode.getOperand(0).getNode();
12316
12317       // OtherOp is what we're multiplying against the constant.
12318       if (Use->getOperand(0) == ConstNode)
12319         OtherOp = Use->getOperand(1).getNode();
12320       else
12321         OtherOp = Use->getOperand(0).getNode();
12322
12323       // Check to see if multiply is with the same operand of our "add".
12324       //
12325       //     ConstNode  = CONST
12326       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12327       //     ...
12328       //     AddNode  = (A + c1)  <-- MulVar is A.
12329       //         = AddNode * ConstNode   <-- current visiting instruction.
12330       //
12331       // If we make this transformation, we will have a common
12332       // multiply (ConstNode * A) that we can save.
12333       if (OtherOp == MulVar)
12334         return true;
12335
12336       // Now check to see if a future expansion will give us a common
12337       // multiply.
12338       //
12339       //     ConstNode  = CONST
12340       //     AddNode    = (A + c1)
12341       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12342       //     ...
12343       //     OtherOp = (A + c2)
12344       //     Use     = OtherOp * ConstNode <-- visiting Use.
12345       //
12346       // If we make this transformation, we will have a common
12347       // multiply (CONST * A) after we also do the same transformation
12348       // to the "t2" instruction.
12349       if (OtherOp->getOpcode() == ISD::ADD &&
12350           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12351           OtherOp->getOperand(0).getNode() == MulVar)
12352         return true;
12353     }
12354   }
12355
12356   // Didn't find a case where this would be profitable.
12357   return false;
12358 }
12359
12360 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12361                                          unsigned NumStores) {
12362   SmallVector<SDValue, 8> Chains;
12363   SmallPtrSet<const SDNode *, 8> Visited;
12364   SDLoc StoreDL(StoreNodes[0].MemNode);
12365
12366   for (unsigned i = 0; i < NumStores; ++i) {
12367     Visited.insert(StoreNodes[i].MemNode);
12368   }
12369
12370   // don't include nodes that are children
12371   for (unsigned i = 0; i < NumStores; ++i) {
12372     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12373       Chains.push_back(StoreNodes[i].MemNode->getChain());
12374   }
12375
12376   assert(Chains.size() > 0 && "Chain should have generated a chain");
12377   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12378 }
12379
12380 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12381                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
12382                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
12383   // Make sure we have something to merge.
12384   if (NumStores < 2)
12385     return false;
12386
12387   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12388
12389   // The latest Node in the DAG.
12390   SDLoc DL(StoreNodes[0].MemNode);
12391
12392   SDValue StoredVal;
12393   if (UseVector) {
12394     bool IsVec = MemVT.isVector();
12395     unsigned Elts = NumStores;
12396     if (IsVec) {
12397       // When merging vector stores, get the total number of elements.
12398       Elts *= MemVT.getVectorNumElements();
12399     }
12400     // Get the type for the merged vector store.
12401     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12402     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
12403
12404     if (IsConstantSrc) {
12405       SmallVector<SDValue, 8> BuildVector;
12406       for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
12407         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12408         SDValue Val = St->getValue();
12409         if (MemVT.getScalarType().isInteger())
12410           if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
12411             Val = DAG.getConstant(
12412                 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12413                 SDLoc(CFP), MemVT);
12414         BuildVector.push_back(Val);
12415       }
12416       StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
12417     } else {
12418       SmallVector<SDValue, 8> Ops;
12419       for (unsigned i = 0; i < NumStores; ++i) {
12420         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12421         SDValue Val = St->getValue();
12422         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12423         if (Val.getValueType() != MemVT)
12424           return false;
12425         Ops.push_back(Val);
12426       }
12427
12428       // Build the extracted vector elements back into a vector.
12429       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
12430                               DL, Ty, Ops);    }
12431   } else {
12432     // We should always use a vector store when merging extracted vector
12433     // elements, so this path implies a store of constants.
12434     assert(IsConstantSrc && "Merged vector elements should use vector store");
12435
12436     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12437     APInt StoreInt(SizeInBits, 0);
12438
12439     // Construct a single integer constant which is made of the smaller
12440     // constant inputs.
12441     bool IsLE = DAG.getDataLayout().isLittleEndian();
12442     for (unsigned i = 0; i < NumStores; ++i) {
12443       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12444       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12445
12446       SDValue Val = St->getValue();
12447       StoreInt <<= ElementSizeBytes * 8;
12448       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12449         StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12450       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12451         StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12452       } else {
12453         llvm_unreachable("Invalid constant element type");
12454       }
12455     }
12456
12457     // Create the new Load and Store operations.
12458     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12459     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12460   }
12461
12462   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12463   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12464
12465   // make sure we use trunc store if it's necessary to be legal.
12466   SDValue NewStore;
12467   if (TLI.isTypeLegal(StoredVal.getValueType())) {
12468     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12469                             FirstInChain->getPointerInfo(),
12470                             FirstInChain->getAlignment());
12471   } else { // Must be realized as a trunc store
12472     EVT LegalizedStoredValueTy =
12473         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12474     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12475     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12476     SDValue ExtendedStoreVal =
12477         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12478                         LegalizedStoredValueTy);
12479     NewStore = DAG.getTruncStore(
12480         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12481         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12482         FirstInChain->getAlignment(),
12483         FirstInChain->getMemOperand()->getFlags());
12484   }
12485
12486   // Replace all merged stores with the new store.
12487   for (unsigned i = 0; i < NumStores; ++i)
12488     CombineTo(StoreNodes[i].MemNode, NewStore);
12489
12490   AddToWorklist(NewChain.getNode());
12491   return true;
12492 }
12493
12494 void DAGCombiner::getStoreMergeCandidates(
12495     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12496   // This holds the base pointer, index, and the offset in bytes from the base
12497   // pointer.
12498   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12499   EVT MemVT = St->getMemoryVT();
12500
12501   // We must have a base and an offset.
12502   if (!BasePtr.Base.getNode())
12503     return;
12504
12505   // Do not handle stores to undef base pointers.
12506   if (BasePtr.Base.isUndef())
12507     return;
12508
12509   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
12510                        isa<ConstantFPSDNode>(St->getValue());
12511   bool IsExtractVecSrc =
12512       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12513        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12514   bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
12515   BaseIndexOffset LBasePtr;
12516   // Match on loadbaseptr if relevant.
12517   if (IsLoadSrc)
12518     LBasePtr = BaseIndexOffset::match(
12519         cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
12520
12521   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
12522     if (Other->isVolatile() || Other->isIndexed())
12523       return false;
12524     // We can merge constant floats to equivalent integers
12525     if (Other->getMemoryVT() != MemVT)
12526       if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
12527             isa<ConstantFPSDNode>(Other->getValue())))
12528         return false;
12529     if (IsLoadSrc) {
12530       // The Load's Base Ptr must also match
12531       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
12532         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12533         if (!(LBasePtr.equalBaseIndex(LPtr)))
12534           return false;
12535       } else
12536         return false;
12537     }
12538     if (IsConstantSrc)
12539       if (!(isa<ConstantSDNode>(Other->getValue()) ||
12540             isa<ConstantFPSDNode>(Other->getValue())))
12541         return false;
12542     if (IsExtractVecSrc)
12543       if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12544             Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
12545         return false;
12546     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12547     return (Ptr.equalBaseIndex(BasePtr));
12548   };
12549   // We looking for a root node which is an ancestor to all mergable
12550   // stores. We search up through a load, to our root and then down
12551   // through all children. For instance we will find Store{1,2,3} if
12552   // St is Store1, Store2. or Store3 where the root is not a load
12553   // which always true for nonvolatile ops. TODO: Expand
12554   // the search to find all valid candidates through multiple layers of loads.
12555   //
12556   // Root
12557   // |-------|-------|
12558   // Load    Load    Store3
12559   // |       |
12560   // Store1   Store2
12561   //
12562   // FIXME: We should be able to climb and
12563   // descend TokenFactors to find candidates as well.
12564
12565   SDNode *RootNode = (St->getChain()).getNode();
12566
12567   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12568     RootNode = Ldn->getChain().getNode();
12569     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12570       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12571         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
12572           if (I2.getOperandNo() == 0)
12573             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
12574               BaseIndexOffset Ptr;
12575               if (CandidateMatch(OtherST, Ptr))
12576                 StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
12577             }
12578   } else
12579     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12580       if (I.getOperandNo() == 0)
12581         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12582           BaseIndexOffset Ptr;
12583           if (CandidateMatch(OtherST, Ptr))
12584             StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
12585         }
12586 }
12587
12588 // We need to check that merging these stores does not cause a loop
12589 // in the DAG. Any store candidate may depend on another candidate
12590 // indirectly through its operand (we already consider dependencies
12591 // through the chain). Check in parallel by searching up from
12592 // non-chain operands of candidates.
12593 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12594     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12595   SmallPtrSet<const SDNode *, 16> Visited;
12596   SmallVector<const SDNode *, 8> Worklist;
12597   // search ops of store candidates
12598   for (unsigned i = 0; i < NumStores; ++i) {
12599     SDNode *n = StoreNodes[i].MemNode;
12600     // Potential loops may happen only through non-chain operands
12601     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12602       Worklist.push_back(n->getOperand(j).getNode());
12603   }
12604   // search through DAG. We can stop early if we find a storenode
12605   for (unsigned i = 0; i < NumStores; ++i) {
12606     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12607       return false;
12608   }
12609   return true;
12610 }
12611
12612 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12613   if (OptLevel == CodeGenOpt::None)
12614     return false;
12615
12616   EVT MemVT = St->getMemoryVT();
12617   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12618
12619   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12620     return false;
12621
12622   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12623       Attribute::NoImplicitFloat);
12624
12625   // This function cannot currently deal with non-byte-sized memory sizes.
12626   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12627     return false;
12628
12629   if (!MemVT.isSimple())
12630     return false;
12631
12632   // Perform an early exit check. Do not bother looking at stored values that
12633   // are not constants, loads, or extracted vector elements.
12634   SDValue StoredVal = St->getValue();
12635   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12636   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12637                        isa<ConstantFPSDNode>(StoredVal);
12638   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12639                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12640
12641   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12642     return false;
12643
12644   // Don't merge vectors into wider vectors if the source data comes from loads.
12645   // TODO: This restriction can be lifted by using logic similar to the
12646   // ExtractVecSrc case.
12647   if (MemVT.isVector() && IsLoadSrc)
12648     return false;
12649
12650   SmallVector<MemOpLink, 8> StoreNodes;
12651   // Find potential store merge candidates by searching through chain sub-DAG
12652   getStoreMergeCandidates(St, StoreNodes);
12653
12654   // Check if there is anything to merge.
12655   if (StoreNodes.size() < 2)
12656     return false;
12657
12658   // Sort the memory operands according to their distance from the
12659   // base pointer.
12660   std::sort(StoreNodes.begin(), StoreNodes.end(),
12661             [](MemOpLink LHS, MemOpLink RHS) {
12662               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12663             });
12664
12665   // Store Merge attempts to merge the lowest stores. This generally
12666   // works out as if successful, as the remaining stores are checked
12667   // after the first collection of stores is merged. However, in the
12668   // case that a non-mergeable store is found first, e.g., {p[-2],
12669   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12670   // mergeable cases. To prevent this, we prune such stores from the
12671   // front of StoreNodes here.
12672
12673   bool RV = false;
12674   while (StoreNodes.size() > 1) {
12675     unsigned StartIdx = 0;
12676     while ((StartIdx + 1 < StoreNodes.size()) &&
12677            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12678                StoreNodes[StartIdx + 1].OffsetFromBase)
12679       ++StartIdx;
12680
12681     // Bail if we don't have enough candidates to merge.
12682     if (StartIdx + 1 >= StoreNodes.size())
12683       return RV;
12684
12685     if (StartIdx)
12686       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12687
12688     // Scan the memory operations on the chain and find the first
12689     // non-consecutive store memory address.
12690     unsigned NumConsecutiveStores = 1;
12691     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12692     // Check that the addresses are consecutive starting from the second
12693     // element in the list of stores.
12694     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12695       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12696       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12697         break;
12698       NumConsecutiveStores = i + 1;
12699     }
12700
12701     if (NumConsecutiveStores < 2) {
12702       StoreNodes.erase(StoreNodes.begin(),
12703                        StoreNodes.begin() + NumConsecutiveStores);
12704       continue;
12705     }
12706
12707     // Check that we can merge these candidates without causing a cycle
12708     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
12709                                                   NumConsecutiveStores)) {
12710       StoreNodes.erase(StoreNodes.begin(),
12711                        StoreNodes.begin() + NumConsecutiveStores);
12712       continue;
12713     }
12714
12715     // The node with the lowest store address.
12716     LLVMContext &Context = *DAG.getContext();
12717     const DataLayout &DL = DAG.getDataLayout();
12718
12719     // Store the constants into memory as one consecutive store.
12720     if (IsConstantSrc) {
12721       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12722       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12723       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12724       unsigned LastLegalType = 0;
12725       unsigned LastLegalVectorType = 0;
12726       bool NonZero = false;
12727       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12728         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12729         SDValue StoredVal = ST->getValue();
12730
12731         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
12732           NonZero |= !C->isNullValue();
12733         } else if (ConstantFPSDNode *C =
12734                        dyn_cast<ConstantFPSDNode>(StoredVal)) {
12735           NonZero |= !C->getConstantFPValue()->isNullValue();
12736         } else {
12737           // Non-constant.
12738           break;
12739         }
12740
12741         // Find a legal type for the constant store.
12742         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12743         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12744         bool IsFast = false;
12745         if (TLI.isTypeLegal(StoreTy) &&
12746             TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
12747             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12748                                    FirstStoreAlign, &IsFast) &&
12749             IsFast) {
12750           LastLegalType = i + 1;
12751           // Or check whether a truncstore is legal.
12752         } else if (TLI.getTypeAction(Context, StoreTy) ==
12753                    TargetLowering::TypePromoteInteger) {
12754           EVT LegalizedStoredValueTy =
12755               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12756           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12757               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
12758               TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12759                                      FirstStoreAS, FirstStoreAlign, &IsFast) &&
12760               IsFast) {
12761             LastLegalType = i + 1;
12762           }
12763         }
12764
12765         // We only use vectors if the constant is known to be zero or the target
12766         // allows it and the function is not marked with the noimplicitfloat
12767         // attribute.
12768         if ((!NonZero ||
12769              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12770             !NoVectors) {
12771           // Find a legal type for the vector store.
12772           EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
12773           if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
12774               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12775                                      FirstStoreAlign, &IsFast) &&
12776               IsFast)
12777             LastLegalVectorType = i + 1;
12778         }
12779       }
12780
12781       // Check if we found a legal integer type that creates a meaningful merge.
12782       if (LastLegalType < 2 && LastLegalVectorType < 2) {
12783         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12784         continue;
12785       }
12786
12787       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12788       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12789
12790       bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
12791                                                     true, UseVector);
12792       if (!Merged) {
12793         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12794         continue;
12795       }
12796       // Remove merged stores for next iteration.
12797       RV = true;
12798       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12799       continue;
12800     }
12801
12802     // When extracting multiple vector elements, try to store them
12803     // in one vector store rather than a sequence of scalar stores.
12804     if (IsExtractVecSrc) {
12805       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12806       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12807       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12808       unsigned NumStoresToMerge = 1;
12809       bool IsVec = MemVT.isVector();
12810       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12811         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12812         unsigned StoreValOpcode = St->getValue().getOpcode();
12813         // This restriction could be loosened.
12814         // Bail out if any stored values are not elements extracted from a
12815         // vector. It should be possible to handle mixed sources, but load
12816         // sources need more careful handling (see the block of code below that
12817         // handles consecutive loads).
12818         if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
12819             StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
12820           return RV;
12821
12822         // Find a legal type for the vector store.
12823         unsigned Elts = i + 1;
12824         if (IsVec) {
12825           // When merging vector stores, get the total number of elements.
12826           Elts *= MemVT.getVectorNumElements();
12827         }
12828         EVT Ty =
12829             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12830         bool IsFast;
12831         if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
12832             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12833                                    FirstStoreAlign, &IsFast) &&
12834             IsFast)
12835           NumStoresToMerge = i + 1;
12836       }
12837
12838       bool Merged = MergeStoresOfConstantsOrVecElts(
12839           StoreNodes, MemVT, NumStoresToMerge, false, true);
12840       if (!Merged) {
12841         StoreNodes.erase(StoreNodes.begin(),
12842                          StoreNodes.begin() + NumStoresToMerge);
12843         continue;
12844       }
12845       // Remove merged stores for next iteration.
12846       StoreNodes.erase(StoreNodes.begin(),
12847                        StoreNodes.begin() + NumStoresToMerge);
12848       RV = true;
12849       continue;
12850     }
12851
12852     // Below we handle the case of multiple consecutive stores that
12853     // come from multiple consecutive loads. We merge them into a single
12854     // wide load and a single wide store.
12855
12856     // Look for load nodes which are used by the stored values.
12857     SmallVector<MemOpLink, 8> LoadNodes;
12858
12859     // Find acceptable loads. Loads need to have the same chain (token factor),
12860     // must not be zext, volatile, indexed, and they must be consecutive.
12861     BaseIndexOffset LdBasePtr;
12862     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12863       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12864       LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
12865       if (!Ld)
12866         break;
12867
12868       // Loads must only have one use.
12869       if (!Ld->hasNUsesOfValue(1, 0))
12870         break;
12871
12872       // The memory operands must not be volatile.
12873       if (Ld->isVolatile() || Ld->isIndexed())
12874         break;
12875
12876       // We do not accept ext loads.
12877       if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12878         break;
12879
12880       // The stored memory type must be the same.
12881       if (Ld->getMemoryVT() != MemVT)
12882         break;
12883
12884       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12885       // If this is not the first ptr that we check.
12886       if (LdBasePtr.Base.getNode()) {
12887         // The base ptr must be the same.
12888         if (!LdPtr.equalBaseIndex(LdBasePtr))
12889           break;
12890       } else {
12891         // Check that all other base pointers are the same as this one.
12892         LdBasePtr = LdPtr;
12893       }
12894
12895       // We found a potential memory operand to merge.
12896       LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
12897     }
12898
12899     if (LoadNodes.size() < 2) {
12900       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12901       continue;
12902     }
12903
12904     // If we have load/store pair instructions and we only have two values,
12905     // don't bother merging.
12906     unsigned RequiredAlignment;
12907     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
12908         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
12909       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
12910       continue;
12911     }
12912     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12913     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12914     unsigned FirstStoreAlign = FirstInChain->getAlignment();
12915     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
12916     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
12917     unsigned FirstLoadAlign = FirstLoad->getAlignment();
12918
12919     // Scan the memory operations on the chain and find the first
12920     // non-consecutive load memory address. These variables hold the index in
12921     // the store node array.
12922     unsigned LastConsecutiveLoad = 0;
12923     // This variable refers to the size and not index in the array.
12924     unsigned LastLegalVectorType = 0;
12925     unsigned LastLegalIntegerType = 0;
12926     StartAddress = LoadNodes[0].OffsetFromBase;
12927     SDValue FirstChain = FirstLoad->getChain();
12928     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
12929       // All loads must share the same chain.
12930       if (LoadNodes[i].MemNode->getChain() != FirstChain)
12931         break;
12932
12933       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
12934       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12935         break;
12936       LastConsecutiveLoad = i;
12937       // Find a legal type for the vector store.
12938       EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
12939       bool IsFastSt, IsFastLd;
12940       if (TLI.isTypeLegal(StoreTy) &&
12941           TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
12942           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12943                                  FirstStoreAlign, &IsFastSt) &&
12944           IsFastSt &&
12945           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12946                                  FirstLoadAlign, &IsFastLd) &&
12947           IsFastLd) {
12948         LastLegalVectorType = i + 1;
12949       }
12950
12951       // Find a legal type for the integer store.
12952       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12953       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12954       if (TLI.isTypeLegal(StoreTy) &&
12955           TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
12956           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12957                                  FirstStoreAlign, &IsFastSt) &&
12958           IsFastSt &&
12959           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12960                                  FirstLoadAlign, &IsFastLd) &&
12961           IsFastLd)
12962         LastLegalIntegerType = i + 1;
12963       // Or check whether a truncstore and extload is legal.
12964       else if (TLI.getTypeAction(Context, StoreTy) ==
12965                TargetLowering::TypePromoteInteger) {
12966         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
12967         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12968             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
12969             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
12970                                StoreTy) &&
12971             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
12972                                StoreTy) &&
12973             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12974             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12975                                    FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
12976             IsFastSt &&
12977             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12978                                    FirstLoadAlign, &IsFastLd) &&
12979             IsFastLd)
12980           LastLegalIntegerType = i + 1;
12981       }
12982     }
12983
12984     // Only use vector types if the vector type is larger than the integer type.
12985     // If they are the same, use integers.
12986     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
12987     unsigned LastLegalType =
12988         std::max(LastLegalVectorType, LastLegalIntegerType);
12989
12990     // We add +1 here because the LastXXX variables refer to location while
12991     // the NumElem refers to array/index size.
12992     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
12993     NumElem = std::min(LastLegalType, NumElem);
12994
12995     if (NumElem < 2) {
12996       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12997       continue;
12998     }
12999
13000     // Find if it is better to use vectors or integers to load and store
13001     // to memory.
13002     EVT JointMemOpVT;
13003     if (UseVectorTy) {
13004       JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
13005     } else {
13006       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13007       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13008     }
13009
13010     SDLoc LoadDL(LoadNodes[0].MemNode);
13011     SDLoc StoreDL(StoreNodes[0].MemNode);
13012
13013     // The merged loads are required to have the same incoming chain, so
13014     // using the first's chain is acceptable.
13015     SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13016                                   FirstLoad->getBasePtr(),
13017                                   FirstLoad->getPointerInfo(), FirstLoadAlign);
13018
13019     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13020
13021     AddToWorklist(NewStoreChain.getNode());
13022
13023     SDValue NewStore = DAG.getStore(
13024         NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
13025         FirstInChain->getPointerInfo(), FirstStoreAlign);
13026
13027     // Transfer chain users from old loads to the new load.
13028     for (unsigned i = 0; i < NumElem; ++i) {
13029       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13030       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13031                                     SDValue(NewLoad.getNode(), 1));
13032     }
13033
13034     // Replace the all stores with the new store.
13035     for (unsigned i = 0; i < NumElem; ++i)
13036       CombineTo(StoreNodes[i].MemNode, NewStore);
13037     RV = true;
13038     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13039     continue;
13040   }
13041   return RV;
13042 }
13043
13044 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13045   SDLoc SL(ST);
13046   SDValue ReplStore;
13047
13048   // Replace the chain to avoid dependency.
13049   if (ST->isTruncatingStore()) {
13050     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13051                                   ST->getBasePtr(), ST->getMemoryVT(),
13052                                   ST->getMemOperand());
13053   } else {
13054     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13055                              ST->getMemOperand());
13056   }
13057
13058   // Create token to keep both nodes around.
13059   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13060                               MVT::Other, ST->getChain(), ReplStore);
13061
13062   // Make sure the new and old chains are cleaned up.
13063   AddToWorklist(Token.getNode());
13064
13065   // Don't add users to work list.
13066   return CombineTo(ST, Token, false);
13067 }
13068
13069 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13070   SDValue Value = ST->getValue();
13071   if (Value.getOpcode() == ISD::TargetConstantFP)
13072     return SDValue();
13073
13074   SDLoc DL(ST);
13075
13076   SDValue Chain = ST->getChain();
13077   SDValue Ptr = ST->getBasePtr();
13078
13079   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13080
13081   // NOTE: If the original store is volatile, this transform must not increase
13082   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13083   // processor operation but an i64 (which is not legal) requires two.  So the
13084   // transform should not be done in this case.
13085
13086   SDValue Tmp;
13087   switch (CFP->getSimpleValueType(0).SimpleTy) {
13088   default:
13089     llvm_unreachable("Unknown FP type");
13090   case MVT::f16:    // We don't do this for these yet.
13091   case MVT::f80:
13092   case MVT::f128:
13093   case MVT::ppcf128:
13094     return SDValue();
13095   case MVT::f32:
13096     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13097         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13098       ;
13099       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13100                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13101                             MVT::i32);
13102       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13103     }
13104
13105     return SDValue();
13106   case MVT::f64:
13107     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13108          !ST->isVolatile()) ||
13109         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13110       ;
13111       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13112                             getZExtValue(), SDLoc(CFP), MVT::i64);
13113       return DAG.getStore(Chain, DL, Tmp,
13114                           Ptr, ST->getMemOperand());
13115     }
13116
13117     if (!ST->isVolatile() &&
13118         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13119       // Many FP stores are not made apparent until after legalize, e.g. for
13120       // argument passing.  Since this is so common, custom legalize the
13121       // 64-bit integer store into two 32-bit stores.
13122       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13123       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13124       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13125       if (DAG.getDataLayout().isBigEndian())
13126         std::swap(Lo, Hi);
13127
13128       unsigned Alignment = ST->getAlignment();
13129       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13130       AAMDNodes AAInfo = ST->getAAInfo();
13131
13132       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13133                                  ST->getAlignment(), MMOFlags, AAInfo);
13134       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13135                         DAG.getConstant(4, DL, Ptr.getValueType()));
13136       Alignment = MinAlign(Alignment, 4U);
13137       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13138                                  ST->getPointerInfo().getWithOffset(4),
13139                                  Alignment, MMOFlags, AAInfo);
13140       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13141                          St0, St1);
13142     }
13143
13144     return SDValue();
13145   }
13146 }
13147
13148 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13149   StoreSDNode *ST  = cast<StoreSDNode>(N);
13150   SDValue Chain = ST->getChain();
13151   SDValue Value = ST->getValue();
13152   SDValue Ptr   = ST->getBasePtr();
13153
13154   // If this is a store of a bit convert, store the input value if the
13155   // resultant store does not need a higher alignment than the original.
13156   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13157       ST->isUnindexed()) {
13158     EVT SVT = Value.getOperand(0).getValueType();
13159     if (((!LegalOperations && !ST->isVolatile()) ||
13160          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13161         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13162       unsigned OrigAlign = ST->getAlignment();
13163       bool Fast = false;
13164       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13165                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13166           Fast) {
13167         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13168                             ST->getPointerInfo(), OrigAlign,
13169                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13170       }
13171     }
13172   }
13173
13174   // Turn 'store undef, Ptr' -> nothing.
13175   if (Value.isUndef() && ST->isUnindexed())
13176     return Chain;
13177
13178   // Try to infer better alignment information than the store already has.
13179   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13180     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13181       if (Align > ST->getAlignment()) {
13182         SDValue NewStore =
13183             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13184                               ST->getMemoryVT(), Align,
13185                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13186         if (NewStore.getNode() != N)
13187           return CombineTo(ST, NewStore, true);
13188       }
13189     }
13190   }
13191
13192   // Try transforming a pair floating point load / store ops to integer
13193   // load / store ops.
13194   if (SDValue NewST = TransformFPLoadStorePair(N))
13195     return NewST;
13196
13197   if (ST->isUnindexed()) {
13198     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13199     // adjacent stores.
13200     if (findBetterNeighborChains(ST)) {
13201       // replaceStoreChain uses CombineTo, which handled all of the worklist
13202       // manipulation. Return the original node to not do anything else.
13203       return SDValue(ST, 0);
13204     }
13205     Chain = ST->getChain();
13206   }
13207
13208   // FIXME: is there such a thing as a truncating indexed store?
13209   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13210       Value.getValueType().isInteger()) {
13211     // See if we can simplify the input to this truncstore with knowledge that
13212     // only the low bits are being used.  For example:
13213     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13214     SDValue Shorter = GetDemandedBits(
13215         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13216                                     ST->getMemoryVT().getScalarSizeInBits()));
13217     AddToWorklist(Value.getNode());
13218     if (Shorter.getNode())
13219       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13220                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13221
13222     // Otherwise, see if we can simplify the operation with
13223     // SimplifyDemandedBits, which only works if the value has a single use.
13224     if (SimplifyDemandedBits(
13225             Value,
13226             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13227                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13228       // Re-visit the store if anything changed and the store hasn't been merged
13229       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13230       // node back to the worklist if necessary, but we also need to re-visit
13231       // the Store node itself.
13232       if (N->getOpcode() != ISD::DELETED_NODE)
13233         AddToWorklist(N);
13234       return SDValue(N, 0);
13235     }
13236   }
13237
13238   // If this is a load followed by a store to the same location, then the store
13239   // is dead/noop.
13240   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13241     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13242         ST->isUnindexed() && !ST->isVolatile() &&
13243         // There can't be any side effects between the load and store, such as
13244         // a call or store.
13245         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13246       // The store is dead, remove it.
13247       return Chain;
13248     }
13249   }
13250
13251   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13252     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13253         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13254         ST->getMemoryVT() == ST1->getMemoryVT()) {
13255       // If this is a store followed by a store with the same value to the same
13256       // location, then the store is dead/noop.
13257       if (ST1->getValue() == Value) {
13258         // The store is dead, remove it.
13259         return Chain;
13260       }
13261
13262       // If this is a store who's preceeding store to the same location
13263       // and no one other node is chained to that store we can effectively
13264       // drop the store. Do not remove stores to undef as they may be used as
13265       // data sinks.
13266       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13267           !ST1->getBasePtr().isUndef()) {
13268         // ST1 is fully overwritten and can be elided. Combine with it's chain
13269         // value.
13270         CombineTo(ST1, ST1->getChain());
13271         return SDValue();
13272       }
13273     }
13274   }
13275
13276   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13277   // truncating store.  We can do this even if this is already a truncstore.
13278   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13279       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13280       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13281                             ST->getMemoryVT())) {
13282     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13283                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13284   }
13285
13286   // Only perform this optimization before the types are legal, because we
13287   // don't want to perform this optimization on every DAGCombine invocation.
13288   if (!LegalTypes) {
13289     for (;;) {
13290       // There can be multiple store sequences on the same chain.
13291       // Keep trying to merge store sequences until we are unable to do so
13292       // or until we merge the last store on the chain.
13293       bool Changed = MergeConsecutiveStores(ST);
13294       if (!Changed) break;
13295       // Return N as merge only uses CombineTo and no worklist clean
13296       // up is necessary.
13297       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13298         return SDValue(N, 0);
13299     }
13300   }
13301
13302   // Try transforming N to an indexed store.
13303   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13304     return SDValue(N, 0);
13305
13306   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13307   //
13308   // Make sure to do this only after attempting to merge stores in order to
13309   //  avoid changing the types of some subset of stores due to visit order,
13310   //  preventing their merging.
13311   if (isa<ConstantFPSDNode>(ST->getValue())) {
13312     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13313       return NewSt;
13314   }
13315
13316   if (SDValue NewSt = splitMergedValStore(ST))
13317     return NewSt;
13318
13319   return ReduceLoadOpStoreWidth(N);
13320 }
13321
13322 /// For the instruction sequence of store below, F and I values
13323 /// are bundled together as an i64 value before being stored into memory.
13324 /// Sometimes it is more efficent to generate separate stores for F and I,
13325 /// which can remove the bitwise instructions or sink them to colder places.
13326 ///
13327 ///   (store (or (zext (bitcast F to i32) to i64),
13328 ///              (shl (zext I to i64), 32)), addr)  -->
13329 ///   (store F, addr) and (store I, addr+4)
13330 ///
13331 /// Similarly, splitting for other merged store can also be beneficial, like:
13332 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13333 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13334 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13335 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13336 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13337 ///
13338 /// We allow each target to determine specifically which kind of splitting is
13339 /// supported.
13340 ///
13341 /// The store patterns are commonly seen from the simple code snippet below
13342 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13343 ///   void goo(const std::pair<int, float> &);
13344 ///   hoo() {
13345 ///     ...
13346 ///     goo(std::make_pair(tmp, ftmp));
13347 ///     ...
13348 ///   }
13349 ///
13350 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13351   if (OptLevel == CodeGenOpt::None)
13352     return SDValue();
13353
13354   SDValue Val = ST->getValue();
13355   SDLoc DL(ST);
13356
13357   // Match OR operand.
13358   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13359     return SDValue();
13360
13361   // Match SHL operand and get Lower and Higher parts of Val.
13362   SDValue Op1 = Val.getOperand(0);
13363   SDValue Op2 = Val.getOperand(1);
13364   SDValue Lo, Hi;
13365   if (Op1.getOpcode() != ISD::SHL) {
13366     std::swap(Op1, Op2);
13367     if (Op1.getOpcode() != ISD::SHL)
13368       return SDValue();
13369   }
13370   Lo = Op2;
13371   Hi = Op1.getOperand(0);
13372   if (!Op1.hasOneUse())
13373     return SDValue();
13374
13375   // Match shift amount to HalfValBitSize.
13376   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13377   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13378   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13379     return SDValue();
13380
13381   // Lo and Hi are zero-extended from int with size less equal than 32
13382   // to i64.
13383   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13384       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13385       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13386       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13387       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13388       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13389     return SDValue();
13390
13391   // Use the EVT of low and high parts before bitcast as the input
13392   // of target query.
13393   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13394                   ? Lo.getOperand(0).getValueType()
13395                   : Lo.getValueType();
13396   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13397                    ? Hi.getOperand(0).getValueType()
13398                    : Hi.getValueType();
13399   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13400     return SDValue();
13401
13402   // Start to split store.
13403   unsigned Alignment = ST->getAlignment();
13404   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13405   AAMDNodes AAInfo = ST->getAAInfo();
13406
13407   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13408   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13409   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13410   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13411
13412   SDValue Chain = ST->getChain();
13413   SDValue Ptr = ST->getBasePtr();
13414   // Lower value store.
13415   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13416                              ST->getAlignment(), MMOFlags, AAInfo);
13417   Ptr =
13418       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13419                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13420   // Higher value store.
13421   SDValue St1 =
13422       DAG.getStore(St0, DL, Hi, Ptr,
13423                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13424                    Alignment / 2, MMOFlags, AAInfo);
13425   return St1;
13426 }
13427
13428 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13429   SDValue InVec = N->getOperand(0);
13430   SDValue InVal = N->getOperand(1);
13431   SDValue EltNo = N->getOperand(2);
13432   SDLoc DL(N);
13433
13434   // If the inserted element is an UNDEF, just use the input vector.
13435   if (InVal.isUndef())
13436     return InVec;
13437
13438   EVT VT = InVec.getValueType();
13439
13440   // Check that we know which element is being inserted
13441   if (!isa<ConstantSDNode>(EltNo))
13442     return SDValue();
13443   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13444
13445   // Canonicalize insert_vector_elt dag nodes.
13446   // Example:
13447   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13448   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13449   //
13450   // Do this only if the child insert_vector node has one use; also
13451   // do this only if indices are both constants and Idx1 < Idx0.
13452   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13453       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13454     unsigned OtherElt = InVec.getConstantOperandVal(2);
13455     if (Elt < OtherElt) {
13456       // Swap nodes.
13457       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13458                                   InVec.getOperand(0), InVal, EltNo);
13459       AddToWorklist(NewOp.getNode());
13460       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13461                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13462     }
13463   }
13464
13465   // If we can't generate a legal BUILD_VECTOR, exit
13466   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13467     return SDValue();
13468
13469   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13470   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13471   // vector elements.
13472   SmallVector<SDValue, 8> Ops;
13473   // Do not combine these two vectors if the output vector will not replace
13474   // the input vector.
13475   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13476     Ops.append(InVec.getNode()->op_begin(),
13477                InVec.getNode()->op_end());
13478   } else if (InVec.isUndef()) {
13479     unsigned NElts = VT.getVectorNumElements();
13480     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13481   } else {
13482     return SDValue();
13483   }
13484
13485   // Insert the element
13486   if (Elt < Ops.size()) {
13487     // All the operands of BUILD_VECTOR must have the same type;
13488     // we enforce that here.
13489     EVT OpVT = Ops[0].getValueType();
13490     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13491   }
13492
13493   // Return the new vector
13494   return DAG.getBuildVector(VT, DL, Ops);
13495 }
13496
13497 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13498     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13499   assert(!OriginalLoad->isVolatile());
13500
13501   EVT ResultVT = EVE->getValueType(0);
13502   EVT VecEltVT = InVecVT.getVectorElementType();
13503   unsigned Align = OriginalLoad->getAlignment();
13504   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13505       VecEltVT.getTypeForEVT(*DAG.getContext()));
13506
13507   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13508     return SDValue();
13509
13510   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13511     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13512   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13513     return SDValue();
13514
13515   Align = NewAlign;
13516
13517   SDValue NewPtr = OriginalLoad->getBasePtr();
13518   SDValue Offset;
13519   EVT PtrType = NewPtr.getValueType();
13520   MachinePointerInfo MPI;
13521   SDLoc DL(EVE);
13522   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13523     int Elt = ConstEltNo->getZExtValue();
13524     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13525     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13526     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13527   } else {
13528     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13529     Offset = DAG.getNode(
13530         ISD::MUL, DL, PtrType, Offset,
13531         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13532     MPI = OriginalLoad->getPointerInfo();
13533   }
13534   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13535
13536   // The replacement we need to do here is a little tricky: we need to
13537   // replace an extractelement of a load with a load.
13538   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13539   // Note that this replacement assumes that the extractvalue is the only
13540   // use of the load; that's okay because we don't want to perform this
13541   // transformation in other cases anyway.
13542   SDValue Load;
13543   SDValue Chain;
13544   if (ResultVT.bitsGT(VecEltVT)) {
13545     // If the result type of vextract is wider than the load, then issue an
13546     // extending load instead.
13547     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13548                                                   VecEltVT)
13549                                    ? ISD::ZEXTLOAD
13550                                    : ISD::EXTLOAD;
13551     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13552                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13553                           Align, OriginalLoad->getMemOperand()->getFlags(),
13554                           OriginalLoad->getAAInfo());
13555     Chain = Load.getValue(1);
13556   } else {
13557     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13558                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13559                        OriginalLoad->getAAInfo());
13560     Chain = Load.getValue(1);
13561     if (ResultVT.bitsLT(VecEltVT))
13562       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13563     else
13564       Load = DAG.getBitcast(ResultVT, Load);
13565   }
13566   WorklistRemover DeadNodes(*this);
13567   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13568   SDValue To[] = { Load, Chain };
13569   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13570   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13571   // worklist explicitly as well.
13572   AddToWorklist(Load.getNode());
13573   AddUsersToWorklist(Load.getNode()); // Add users too
13574   // Make sure to revisit this node to clean it up; it will usually be dead.
13575   AddToWorklist(EVE);
13576   ++OpsNarrowed;
13577   return SDValue(EVE, 0);
13578 }
13579
13580 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13581   // (vextract (scalar_to_vector val, 0) -> val
13582   SDValue InVec = N->getOperand(0);
13583   EVT VT = InVec.getValueType();
13584   EVT NVT = N->getValueType(0);
13585
13586   if (InVec.isUndef())
13587     return DAG.getUNDEF(NVT);
13588
13589   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13590     // Check if the result type doesn't match the inserted element type. A
13591     // SCALAR_TO_VECTOR may truncate the inserted element and the
13592     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13593     SDValue InOp = InVec.getOperand(0);
13594     if (InOp.getValueType() != NVT) {
13595       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13596       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13597     }
13598     return InOp;
13599   }
13600
13601   SDValue EltNo = N->getOperand(1);
13602   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13603
13604   // extract_vector_elt (build_vector x, y), 1 -> y
13605   if (ConstEltNo &&
13606       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13607       TLI.isTypeLegal(VT) &&
13608       (InVec.hasOneUse() ||
13609        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13610     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13611     EVT InEltVT = Elt.getValueType();
13612
13613     // Sometimes build_vector's scalar input types do not match result type.
13614     if (NVT == InEltVT)
13615       return Elt;
13616
13617     // TODO: It may be useful to truncate if free if the build_vector implicitly
13618     // converts.
13619   }
13620
13621   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
13622   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13623       ConstEltNo->isNullValue() && VT.isInteger()) {
13624     SDValue BCSrc = InVec.getOperand(0);
13625     if (BCSrc.getValueType().isScalarInteger())
13626       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13627   }
13628
13629   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13630   //
13631   // This only really matters if the index is non-constant since other combines
13632   // on the constant elements already work.
13633   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13634       EltNo == InVec.getOperand(2)) {
13635     SDValue Elt = InVec.getOperand(1);
13636     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13637   }
13638
13639   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13640   // We only perform this optimization before the op legalization phase because
13641   // we may introduce new vector instructions which are not backed by TD
13642   // patterns. For example on AVX, extracting elements from a wide vector
13643   // without using extract_subvector. However, if we can find an underlying
13644   // scalar value, then we can always use that.
13645   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13646     int NumElem = VT.getVectorNumElements();
13647     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13648     // Find the new index to extract from.
13649     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13650
13651     // Extracting an undef index is undef.
13652     if (OrigElt == -1)
13653       return DAG.getUNDEF(NVT);
13654
13655     // Select the right vector half to extract from.
13656     SDValue SVInVec;
13657     if (OrigElt < NumElem) {
13658       SVInVec = InVec->getOperand(0);
13659     } else {
13660       SVInVec = InVec->getOperand(1);
13661       OrigElt -= NumElem;
13662     }
13663
13664     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13665       SDValue InOp = SVInVec.getOperand(OrigElt);
13666       if (InOp.getValueType() != NVT) {
13667         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13668         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13669       }
13670
13671       return InOp;
13672     }
13673
13674     // FIXME: We should handle recursing on other vector shuffles and
13675     // scalar_to_vector here as well.
13676
13677     if (!LegalOperations) {
13678       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13679       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13680                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13681     }
13682   }
13683
13684   bool BCNumEltsChanged = false;
13685   EVT ExtVT = VT.getVectorElementType();
13686   EVT LVT = ExtVT;
13687
13688   // If the result of load has to be truncated, then it's not necessarily
13689   // profitable.
13690   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13691     return SDValue();
13692
13693   if (InVec.getOpcode() == ISD::BITCAST) {
13694     // Don't duplicate a load with other uses.
13695     if (!InVec.hasOneUse())
13696       return SDValue();
13697
13698     EVT BCVT = InVec.getOperand(0).getValueType();
13699     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13700       return SDValue();
13701     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13702       BCNumEltsChanged = true;
13703     InVec = InVec.getOperand(0);
13704     ExtVT = BCVT.getVectorElementType();
13705   }
13706
13707   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13708   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13709       ISD::isNormalLoad(InVec.getNode()) &&
13710       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13711     SDValue Index = N->getOperand(1);
13712     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13713       if (!OrigLoad->isVolatile()) {
13714         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13715                                                              OrigLoad);
13716       }
13717     }
13718   }
13719
13720   // Perform only after legalization to ensure build_vector / vector_shuffle
13721   // optimizations have already been done.
13722   if (!LegalOperations) return SDValue();
13723
13724   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
13725   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
13726   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
13727
13728   if (ConstEltNo) {
13729     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13730
13731     LoadSDNode *LN0 = nullptr;
13732     const ShuffleVectorSDNode *SVN = nullptr;
13733     if (ISD::isNormalLoad(InVec.getNode())) {
13734       LN0 = cast<LoadSDNode>(InVec);
13735     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13736                InVec.getOperand(0).getValueType() == ExtVT &&
13737                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
13738       // Don't duplicate a load with other uses.
13739       if (!InVec.hasOneUse())
13740         return SDValue();
13741
13742       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
13743     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
13744       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
13745       // =>
13746       // (load $addr+1*size)
13747
13748       // Don't duplicate a load with other uses.
13749       if (!InVec.hasOneUse())
13750         return SDValue();
13751
13752       // If the bit convert changed the number of elements, it is unsafe
13753       // to examine the mask.
13754       if (BCNumEltsChanged)
13755         return SDValue();
13756
13757       // Select the input vector, guarding against out of range extract vector.
13758       unsigned NumElems = VT.getVectorNumElements();
13759       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
13760       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
13761
13762       if (InVec.getOpcode() == ISD::BITCAST) {
13763         // Don't duplicate a load with other uses.
13764         if (!InVec.hasOneUse())
13765           return SDValue();
13766
13767         InVec = InVec.getOperand(0);
13768       }
13769       if (ISD::isNormalLoad(InVec.getNode())) {
13770         LN0 = cast<LoadSDNode>(InVec);
13771         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
13772         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
13773       }
13774     }
13775
13776     // Make sure we found a non-volatile load and the extractelement is
13777     // the only use.
13778     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
13779       return SDValue();
13780
13781     // If Idx was -1 above, Elt is going to be -1, so just return undef.
13782     if (Elt == -1)
13783       return DAG.getUNDEF(LVT);
13784
13785     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
13786   }
13787
13788   return SDValue();
13789 }
13790
13791 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
13792 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
13793   // We perform this optimization post type-legalization because
13794   // the type-legalizer often scalarizes integer-promoted vectors.
13795   // Performing this optimization before may create bit-casts which
13796   // will be type-legalized to complex code sequences.
13797   // We perform this optimization only before the operation legalizer because we
13798   // may introduce illegal operations.
13799   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
13800     return SDValue();
13801
13802   unsigned NumInScalars = N->getNumOperands();
13803   SDLoc DL(N);
13804   EVT VT = N->getValueType(0);
13805
13806   // Check to see if this is a BUILD_VECTOR of a bunch of values
13807   // which come from any_extend or zero_extend nodes. If so, we can create
13808   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
13809   // optimizations. We do not handle sign-extend because we can't fill the sign
13810   // using shuffles.
13811   EVT SourceType = MVT::Other;
13812   bool AllAnyExt = true;
13813
13814   for (unsigned i = 0; i != NumInScalars; ++i) {
13815     SDValue In = N->getOperand(i);
13816     // Ignore undef inputs.
13817     if (In.isUndef()) continue;
13818
13819     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
13820     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
13821
13822     // Abort if the element is not an extension.
13823     if (!ZeroExt && !AnyExt) {
13824       SourceType = MVT::Other;
13825       break;
13826     }
13827
13828     // The input is a ZeroExt or AnyExt. Check the original type.
13829     EVT InTy = In.getOperand(0).getValueType();
13830
13831     // Check that all of the widened source types are the same.
13832     if (SourceType == MVT::Other)
13833       // First time.
13834       SourceType = InTy;
13835     else if (InTy != SourceType) {
13836       // Multiple income types. Abort.
13837       SourceType = MVT::Other;
13838       break;
13839     }
13840
13841     // Check if all of the extends are ANY_EXTENDs.
13842     AllAnyExt &= AnyExt;
13843   }
13844
13845   // In order to have valid types, all of the inputs must be extended from the
13846   // same source type and all of the inputs must be any or zero extend.
13847   // Scalar sizes must be a power of two.
13848   EVT OutScalarTy = VT.getScalarType();
13849   bool ValidTypes = SourceType != MVT::Other &&
13850                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
13851                  isPowerOf2_32(SourceType.getSizeInBits());
13852
13853   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
13854   // turn into a single shuffle instruction.
13855   if (!ValidTypes)
13856     return SDValue();
13857
13858   bool isLE = DAG.getDataLayout().isLittleEndian();
13859   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
13860   assert(ElemRatio > 1 && "Invalid element size ratio");
13861   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
13862                                DAG.getConstant(0, DL, SourceType);
13863
13864   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
13865   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
13866
13867   // Populate the new build_vector
13868   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13869     SDValue Cast = N->getOperand(i);
13870     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
13871             Cast.getOpcode() == ISD::ZERO_EXTEND ||
13872             Cast.isUndef()) && "Invalid cast opcode");
13873     SDValue In;
13874     if (Cast.isUndef())
13875       In = DAG.getUNDEF(SourceType);
13876     else
13877       In = Cast->getOperand(0);
13878     unsigned Index = isLE ? (i * ElemRatio) :
13879                             (i * ElemRatio + (ElemRatio - 1));
13880
13881     assert(Index < Ops.size() && "Invalid index");
13882     Ops[Index] = In;
13883   }
13884
13885   // The type of the new BUILD_VECTOR node.
13886   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
13887   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
13888          "Invalid vector size");
13889   // Check if the new vector type is legal.
13890   if (!isTypeLegal(VecVT)) return SDValue();
13891
13892   // Make the new BUILD_VECTOR.
13893   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
13894
13895   // The new BUILD_VECTOR node has the potential to be further optimized.
13896   AddToWorklist(BV.getNode());
13897   // Bitcast to the desired type.
13898   return DAG.getBitcast(VT, BV);
13899 }
13900
13901 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
13902   EVT VT = N->getValueType(0);
13903
13904   unsigned NumInScalars = N->getNumOperands();
13905   SDLoc DL(N);
13906
13907   EVT SrcVT = MVT::Other;
13908   unsigned Opcode = ISD::DELETED_NODE;
13909   unsigned NumDefs = 0;
13910
13911   for (unsigned i = 0; i != NumInScalars; ++i) {
13912     SDValue In = N->getOperand(i);
13913     unsigned Opc = In.getOpcode();
13914
13915     if (Opc == ISD::UNDEF)
13916       continue;
13917
13918     // If all scalar values are floats and converted from integers.
13919     if (Opcode == ISD::DELETED_NODE &&
13920         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
13921       Opcode = Opc;
13922     }
13923
13924     if (Opc != Opcode)
13925       return SDValue();
13926
13927     EVT InVT = In.getOperand(0).getValueType();
13928
13929     // If all scalar values are typed differently, bail out. It's chosen to
13930     // simplify BUILD_VECTOR of integer types.
13931     if (SrcVT == MVT::Other)
13932       SrcVT = InVT;
13933     if (SrcVT != InVT)
13934       return SDValue();
13935     NumDefs++;
13936   }
13937
13938   // If the vector has just one element defined, it's not worth to fold it into
13939   // a vectorized one.
13940   if (NumDefs < 2)
13941     return SDValue();
13942
13943   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
13944          && "Should only handle conversion from integer to float.");
13945   assert(SrcVT != MVT::Other && "Cannot determine source type!");
13946
13947   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
13948
13949   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
13950     return SDValue();
13951
13952   // Just because the floating-point vector type is legal does not necessarily
13953   // mean that the corresponding integer vector type is.
13954   if (!isTypeLegal(NVT))
13955     return SDValue();
13956
13957   SmallVector<SDValue, 8> Opnds;
13958   for (unsigned i = 0; i != NumInScalars; ++i) {
13959     SDValue In = N->getOperand(i);
13960
13961     if (In.isUndef())
13962       Opnds.push_back(DAG.getUNDEF(SrcVT));
13963     else
13964       Opnds.push_back(In.getOperand(0));
13965   }
13966   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
13967   AddToWorklist(BV.getNode());
13968
13969   return DAG.getNode(Opcode, DL, VT, BV);
13970 }
13971
13972 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
13973                                            ArrayRef<int> VectorMask,
13974                                            SDValue VecIn1, SDValue VecIn2,
13975                                            unsigned LeftIdx) {
13976   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13977   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
13978
13979   EVT VT = N->getValueType(0);
13980   EVT InVT1 = VecIn1.getValueType();
13981   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
13982
13983   unsigned Vec2Offset = InVT1.getVectorNumElements();
13984   unsigned NumElems = VT.getVectorNumElements();
13985   unsigned ShuffleNumElems = NumElems;
13986
13987   // We can't generate a shuffle node with mismatched input and output types.
13988   // Try to make the types match the type of the output.
13989   if (InVT1 != VT || InVT2 != VT) {
13990     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
13991       // If the output vector length is a multiple of both input lengths,
13992       // we can concatenate them and pad the rest with undefs.
13993       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
13994       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
13995       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
13996       ConcatOps[0] = VecIn1;
13997       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
13998       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
13999       VecIn2 = SDValue();
14000     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14001       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
14002         return SDValue();
14003
14004       if (!VecIn2.getNode()) {
14005         // If we only have one input vector, and it's twice the size of the
14006         // output, split it in two.
14007         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14008                              DAG.getConstant(NumElems, DL, IdxTy));
14009         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14010         // Since we now have shorter input vectors, adjust the offset of the
14011         // second vector's start.
14012         Vec2Offset = NumElems;
14013       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14014         // VecIn1 is wider than the output, and we have another, possibly
14015         // smaller input. Pad the smaller input with undefs, shuffle at the
14016         // input vector width, and extract the output.
14017         // The shuffle type is different than VT, so check legality again.
14018         if (LegalOperations &&
14019             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14020           return SDValue();
14021
14022         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14023         // lower it back into a BUILD_VECTOR. So if the inserted type is
14024         // illegal, don't even try.
14025         if (InVT1 != InVT2) {
14026           if (!TLI.isTypeLegal(InVT2))
14027             return SDValue();
14028           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14029                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14030         }
14031         ShuffleNumElems = NumElems * 2;
14032       } else {
14033         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14034         // than VecIn1. We can't handle this for now - this case will disappear
14035         // when we start sorting the vectors by type.
14036         return SDValue();
14037       }
14038     } else {
14039       // TODO: Support cases where the length mismatch isn't exactly by a
14040       // factor of 2.
14041       // TODO: Move this check upwards, so that if we have bad type
14042       // mismatches, we don't create any DAG nodes.
14043       return SDValue();
14044     }
14045   }
14046
14047   // Initialize mask to undef.
14048   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14049
14050   // Only need to run up to the number of elements actually used, not the
14051   // total number of elements in the shuffle - if we are shuffling a wider
14052   // vector, the high lanes should be set to undef.
14053   for (unsigned i = 0; i != NumElems; ++i) {
14054     if (VectorMask[i] <= 0)
14055       continue;
14056
14057     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14058     if (VectorMask[i] == (int)LeftIdx) {
14059       Mask[i] = ExtIndex;
14060     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14061       Mask[i] = Vec2Offset + ExtIndex;
14062     }
14063   }
14064
14065   // The type the input vectors may have changed above.
14066   InVT1 = VecIn1.getValueType();
14067
14068   // If we already have a VecIn2, it should have the same type as VecIn1.
14069   // If we don't, get an undef/zero vector of the appropriate type.
14070   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14071   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14072
14073   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14074   if (ShuffleNumElems > NumElems)
14075     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14076
14077   return Shuffle;
14078 }
14079
14080 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14081 // operations. If the types of the vectors we're extracting from allow it,
14082 // turn this into a vector_shuffle node.
14083 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14084   SDLoc DL(N);
14085   EVT VT = N->getValueType(0);
14086
14087   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14088   if (!isTypeLegal(VT))
14089     return SDValue();
14090
14091   // May only combine to shuffle after legalize if shuffle is legal.
14092   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14093     return SDValue();
14094
14095   bool UsesZeroVector = false;
14096   unsigned NumElems = N->getNumOperands();
14097
14098   // Record, for each element of the newly built vector, which input vector
14099   // that element comes from. -1 stands for undef, 0 for the zero vector,
14100   // and positive values for the input vectors.
14101   // VectorMask maps each element to its vector number, and VecIn maps vector
14102   // numbers to their initial SDValues.
14103
14104   SmallVector<int, 8> VectorMask(NumElems, -1);
14105   SmallVector<SDValue, 8> VecIn;
14106   VecIn.push_back(SDValue());
14107
14108   for (unsigned i = 0; i != NumElems; ++i) {
14109     SDValue Op = N->getOperand(i);
14110
14111     if (Op.isUndef())
14112       continue;
14113
14114     // See if we can use a blend with a zero vector.
14115     // TODO: Should we generalize this to a blend with an arbitrary constant
14116     // vector?
14117     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14118       UsesZeroVector = true;
14119       VectorMask[i] = 0;
14120       continue;
14121     }
14122
14123     // Not an undef or zero. If the input is something other than an
14124     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14125     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14126         !isa<ConstantSDNode>(Op.getOperand(1)))
14127       return SDValue();
14128
14129     SDValue ExtractedFromVec = Op.getOperand(0);
14130
14131     // All inputs must have the same element type as the output.
14132     if (VT.getVectorElementType() !=
14133         ExtractedFromVec.getValueType().getVectorElementType())
14134       return SDValue();
14135
14136     // Have we seen this input vector before?
14137     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14138     // a map back from SDValues to numbers isn't worth it.
14139     unsigned Idx = std::distance(
14140         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14141     if (Idx == VecIn.size())
14142       VecIn.push_back(ExtractedFromVec);
14143
14144     VectorMask[i] = Idx;
14145   }
14146
14147   // If we didn't find at least one input vector, bail out.
14148   if (VecIn.size() < 2)
14149     return SDValue();
14150
14151   // TODO: We want to sort the vectors by descending length, so that adjacent
14152   // pairs have similar length, and the longer vector is always first in the
14153   // pair.
14154
14155   // TODO: Should this fire if some of the input vectors has illegal type (like
14156   // it does now), or should we let legalization run its course first?
14157
14158   // Shuffle phase:
14159   // Take pairs of vectors, and shuffle them so that the result has elements
14160   // from these vectors in the correct places.
14161   // For example, given:
14162   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14163   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14164   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14165   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14166   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14167   // We will generate:
14168   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14169   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14170   SmallVector<SDValue, 4> Shuffles;
14171   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14172     unsigned LeftIdx = 2 * In + 1;
14173     SDValue VecLeft = VecIn[LeftIdx];
14174     SDValue VecRight =
14175         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14176
14177     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14178                                                 VecRight, LeftIdx))
14179       Shuffles.push_back(Shuffle);
14180     else
14181       return SDValue();
14182   }
14183
14184   // If we need the zero vector as an "ingredient" in the blend tree, add it
14185   // to the list of shuffles.
14186   if (UsesZeroVector)
14187     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14188                                       : DAG.getConstantFP(0.0, DL, VT));
14189
14190   // If we only have one shuffle, we're done.
14191   if (Shuffles.size() == 1)
14192     return Shuffles[0];
14193
14194   // Update the vector mask to point to the post-shuffle vectors.
14195   for (int &Vec : VectorMask)
14196     if (Vec == 0)
14197       Vec = Shuffles.size() - 1;
14198     else
14199       Vec = (Vec - 1) / 2;
14200
14201   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14202   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14203   // generate:
14204   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14205   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14206   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14207   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14208   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14209   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14210   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14211
14212   // Make sure the initial size of the shuffle list is even.
14213   if (Shuffles.size() % 2)
14214     Shuffles.push_back(DAG.getUNDEF(VT));
14215
14216   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14217     if (CurSize % 2) {
14218       Shuffles[CurSize] = DAG.getUNDEF(VT);
14219       CurSize++;
14220     }
14221     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14222       int Left = 2 * In;
14223       int Right = 2 * In + 1;
14224       SmallVector<int, 8> Mask(NumElems, -1);
14225       for (unsigned i = 0; i != NumElems; ++i) {
14226         if (VectorMask[i] == Left) {
14227           Mask[i] = i;
14228           VectorMask[i] = In;
14229         } else if (VectorMask[i] == Right) {
14230           Mask[i] = i + NumElems;
14231           VectorMask[i] = In;
14232         }
14233       }
14234
14235       Shuffles[In] =
14236           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14237     }
14238   }
14239
14240   return Shuffles[0];
14241 }
14242
14243 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14244   EVT VT = N->getValueType(0);
14245
14246   // A vector built entirely of undefs is undef.
14247   if (ISD::allOperandsUndef(N))
14248     return DAG.getUNDEF(VT);
14249
14250   // Check if we can express BUILD VECTOR via subvector extract.
14251   if (!LegalTypes && (N->getNumOperands() > 1)) {
14252     SDValue Op0 = N->getOperand(0);
14253     auto checkElem = [&](SDValue Op) -> uint64_t {
14254       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14255           (Op0.getOperand(0) == Op.getOperand(0)))
14256         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14257           return CNode->getZExtValue();
14258       return -1;
14259     };
14260
14261     int Offset = checkElem(Op0);
14262     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14263       if (Offset + i != checkElem(N->getOperand(i))) {
14264         Offset = -1;
14265         break;
14266       }
14267     }
14268
14269     if ((Offset == 0) &&
14270         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14271       return Op0.getOperand(0);
14272     if ((Offset != -1) &&
14273         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14274          0)) // IDX must be multiple of output size.
14275       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14276                          Op0.getOperand(0), Op0.getOperand(1));
14277   }
14278
14279   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14280     return V;
14281
14282   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14283     return V;
14284
14285   if (SDValue V = reduceBuildVecToShuffle(N))
14286     return V;
14287
14288   return SDValue();
14289 }
14290
14291 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14292   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14293   EVT OpVT = N->getOperand(0).getValueType();
14294
14295   // If the operands are legal vectors, leave them alone.
14296   if (TLI.isTypeLegal(OpVT))
14297     return SDValue();
14298
14299   SDLoc DL(N);
14300   EVT VT = N->getValueType(0);
14301   SmallVector<SDValue, 8> Ops;
14302
14303   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14304   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14305
14306   // Keep track of what we encounter.
14307   bool AnyInteger = false;
14308   bool AnyFP = false;
14309   for (const SDValue &Op : N->ops()) {
14310     if (ISD::BITCAST == Op.getOpcode() &&
14311         !Op.getOperand(0).getValueType().isVector())
14312       Ops.push_back(Op.getOperand(0));
14313     else if (ISD::UNDEF == Op.getOpcode())
14314       Ops.push_back(ScalarUndef);
14315     else
14316       return SDValue();
14317
14318     // Note whether we encounter an integer or floating point scalar.
14319     // If it's neither, bail out, it could be something weird like x86mmx.
14320     EVT LastOpVT = Ops.back().getValueType();
14321     if (LastOpVT.isFloatingPoint())
14322       AnyFP = true;
14323     else if (LastOpVT.isInteger())
14324       AnyInteger = true;
14325     else
14326       return SDValue();
14327   }
14328
14329   // If any of the operands is a floating point scalar bitcast to a vector,
14330   // use floating point types throughout, and bitcast everything.
14331   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14332   if (AnyFP) {
14333     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14334     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14335     if (AnyInteger) {
14336       for (SDValue &Op : Ops) {
14337         if (Op.getValueType() == SVT)
14338           continue;
14339         if (Op.isUndef())
14340           Op = ScalarUndef;
14341         else
14342           Op = DAG.getBitcast(SVT, Op);
14343       }
14344     }
14345   }
14346
14347   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14348                                VT.getSizeInBits() / SVT.getSizeInBits());
14349   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14350 }
14351
14352 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14353 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14354 // most two distinct vectors the same size as the result, attempt to turn this
14355 // into a legal shuffle.
14356 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14357   EVT VT = N->getValueType(0);
14358   EVT OpVT = N->getOperand(0).getValueType();
14359   int NumElts = VT.getVectorNumElements();
14360   int NumOpElts = OpVT.getVectorNumElements();
14361
14362   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14363   SmallVector<int, 8> Mask;
14364
14365   for (SDValue Op : N->ops()) {
14366     // Peek through any bitcast.
14367     while (Op.getOpcode() == ISD::BITCAST)
14368       Op = Op.getOperand(0);
14369
14370     // UNDEF nodes convert to UNDEF shuffle mask values.
14371     if (Op.isUndef()) {
14372       Mask.append((unsigned)NumOpElts, -1);
14373       continue;
14374     }
14375
14376     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14377       return SDValue();
14378
14379     // What vector are we extracting the subvector from and at what index?
14380     SDValue ExtVec = Op.getOperand(0);
14381
14382     // We want the EVT of the original extraction to correctly scale the
14383     // extraction index.
14384     EVT ExtVT = ExtVec.getValueType();
14385
14386     // Peek through any bitcast.
14387     while (ExtVec.getOpcode() == ISD::BITCAST)
14388       ExtVec = ExtVec.getOperand(0);
14389
14390     // UNDEF nodes convert to UNDEF shuffle mask values.
14391     if (ExtVec.isUndef()) {
14392       Mask.append((unsigned)NumOpElts, -1);
14393       continue;
14394     }
14395
14396     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14397       return SDValue();
14398     int ExtIdx = Op.getConstantOperandVal(1);
14399
14400     // Ensure that we are extracting a subvector from a vector the same
14401     // size as the result.
14402     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14403       return SDValue();
14404
14405     // Scale the subvector index to account for any bitcast.
14406     int NumExtElts = ExtVT.getVectorNumElements();
14407     if (0 == (NumExtElts % NumElts))
14408       ExtIdx /= (NumExtElts / NumElts);
14409     else if (0 == (NumElts % NumExtElts))
14410       ExtIdx *= (NumElts / NumExtElts);
14411     else
14412       return SDValue();
14413
14414     // At most we can reference 2 inputs in the final shuffle.
14415     if (SV0.isUndef() || SV0 == ExtVec) {
14416       SV0 = ExtVec;
14417       for (int i = 0; i != NumOpElts; ++i)
14418         Mask.push_back(i + ExtIdx);
14419     } else if (SV1.isUndef() || SV1 == ExtVec) {
14420       SV1 = ExtVec;
14421       for (int i = 0; i != NumOpElts; ++i)
14422         Mask.push_back(i + ExtIdx + NumElts);
14423     } else {
14424       return SDValue();
14425     }
14426   }
14427
14428   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14429     return SDValue();
14430
14431   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14432                               DAG.getBitcast(VT, SV1), Mask);
14433 }
14434
14435 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14436   // If we only have one input vector, we don't need to do any concatenation.
14437   if (N->getNumOperands() == 1)
14438     return N->getOperand(0);
14439
14440   // Check if all of the operands are undefs.
14441   EVT VT = N->getValueType(0);
14442   if (ISD::allOperandsUndef(N))
14443     return DAG.getUNDEF(VT);
14444
14445   // Optimize concat_vectors where all but the first of the vectors are undef.
14446   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14447         return Op.isUndef();
14448       })) {
14449     SDValue In = N->getOperand(0);
14450     assert(In.getValueType().isVector() && "Must concat vectors");
14451
14452     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14453     if (In->getOpcode() == ISD::BITCAST &&
14454         !In->getOperand(0)->getValueType(0).isVector()) {
14455       SDValue Scalar = In->getOperand(0);
14456
14457       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14458       // look through the trunc so we can still do the transform:
14459       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14460       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14461           !TLI.isTypeLegal(Scalar.getValueType()) &&
14462           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14463         Scalar = Scalar->getOperand(0);
14464
14465       EVT SclTy = Scalar->getValueType(0);
14466
14467       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14468         return SDValue();
14469
14470       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14471       if (VNTNumElms < 2)
14472         return SDValue();
14473
14474       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14475       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14476         return SDValue();
14477
14478       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14479       return DAG.getBitcast(VT, Res);
14480     }
14481   }
14482
14483   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14484   // We have already tested above for an UNDEF only concatenation.
14485   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14486   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14487   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14488     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14489   };
14490   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14491     SmallVector<SDValue, 8> Opnds;
14492     EVT SVT = VT.getScalarType();
14493
14494     EVT MinVT = SVT;
14495     if (!SVT.isFloatingPoint()) {
14496       // If BUILD_VECTOR are from built from integer, they may have different
14497       // operand types. Get the smallest type and truncate all operands to it.
14498       bool FoundMinVT = false;
14499       for (const SDValue &Op : N->ops())
14500         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14501           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14502           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14503           FoundMinVT = true;
14504         }
14505       assert(FoundMinVT && "Concat vector type mismatch");
14506     }
14507
14508     for (const SDValue &Op : N->ops()) {
14509       EVT OpVT = Op.getValueType();
14510       unsigned NumElts = OpVT.getVectorNumElements();
14511
14512       if (ISD::UNDEF == Op.getOpcode())
14513         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14514
14515       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14516         if (SVT.isFloatingPoint()) {
14517           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14518           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14519         } else {
14520           for (unsigned i = 0; i != NumElts; ++i)
14521             Opnds.push_back(
14522                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14523         }
14524       }
14525     }
14526
14527     assert(VT.getVectorNumElements() == Opnds.size() &&
14528            "Concat vector type mismatch");
14529     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14530   }
14531
14532   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14533   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14534     return V;
14535
14536   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14537   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14538     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14539       return V;
14540
14541   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14542   // nodes often generate nop CONCAT_VECTOR nodes.
14543   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14544   // place the incoming vectors at the exact same location.
14545   SDValue SingleSource = SDValue();
14546   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14547
14548   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14549     SDValue Op = N->getOperand(i);
14550
14551     if (Op.isUndef())
14552       continue;
14553
14554     // Check if this is the identity extract:
14555     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14556       return SDValue();
14557
14558     // Find the single incoming vector for the extract_subvector.
14559     if (SingleSource.getNode()) {
14560       if (Op.getOperand(0) != SingleSource)
14561         return SDValue();
14562     } else {
14563       SingleSource = Op.getOperand(0);
14564
14565       // Check the source type is the same as the type of the result.
14566       // If not, this concat may extend the vector, so we can not
14567       // optimize it away.
14568       if (SingleSource.getValueType() != N->getValueType(0))
14569         return SDValue();
14570     }
14571
14572     unsigned IdentityIndex = i * PartNumElem;
14573     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14574     // The extract index must be constant.
14575     if (!CS)
14576       return SDValue();
14577
14578     // Check that we are reading from the identity index.
14579     if (CS->getZExtValue() != IdentityIndex)
14580       return SDValue();
14581   }
14582
14583   if (SingleSource.getNode())
14584     return SingleSource;
14585
14586   return SDValue();
14587 }
14588
14589 /// If we are extracting a subvector produced by a wide binary operator with at
14590 /// at least one operand that was the result of a vector concatenation, then try
14591 /// to use the narrow vector operands directly to avoid the concatenation and
14592 /// extraction.
14593 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
14594   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
14595   // some of these bailouts with other transforms.
14596
14597   // The extract index must be a constant, so we can map it to a concat operand.
14598   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14599   if (!ExtractIndex)
14600     return SDValue();
14601
14602   // Only handle the case where we are doubling and then halving. A larger ratio
14603   // may require more than two narrow binops to replace the wide binop.
14604   EVT VT = Extract->getValueType(0);
14605   unsigned NumElems = VT.getVectorNumElements();
14606   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
14607          "Extract index is not a multiple of the vector length.");
14608   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
14609     return SDValue();
14610
14611   // We are looking for an optionally bitcasted wide vector binary operator
14612   // feeding an extract subvector.
14613   SDValue BinOp = Extract->getOperand(0);
14614   if (BinOp.getOpcode() == ISD::BITCAST)
14615     BinOp = BinOp.getOperand(0);
14616
14617   // TODO: The motivating case for this transform is an x86 AVX1 target. That
14618   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
14619   // flavors, but no other 256-bit integer support. This could be extended to
14620   // handle any binop, but that may require fixing/adding other folds to avoid
14621   // codegen regressions.
14622   unsigned BOpcode = BinOp.getOpcode();
14623   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
14624     return SDValue();
14625
14626   // The binop must be a vector type, so we can chop it in half.
14627   EVT WideBVT = BinOp.getValueType();
14628   if (!WideBVT.isVector())
14629     return SDValue();
14630
14631   // Bail out if the target does not support a narrower version of the binop.
14632   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
14633                                    WideBVT.getVectorNumElements() / 2);
14634   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14635   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
14636     return SDValue();
14637
14638   // Peek through bitcasts of the binary operator operands if needed.
14639   SDValue LHS = BinOp.getOperand(0);
14640   if (LHS.getOpcode() == ISD::BITCAST)
14641     LHS = LHS.getOperand(0);
14642
14643   SDValue RHS = BinOp.getOperand(1);
14644   if (RHS.getOpcode() == ISD::BITCAST)
14645     RHS = RHS.getOperand(0);
14646
14647   // We need at least one concatenation operation of a binop operand to make
14648   // this transform worthwhile. The concat must double the input vector sizes.
14649   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
14650   bool ConcatL =
14651       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
14652   bool ConcatR =
14653       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
14654   if (!ConcatL && !ConcatR)
14655     return SDValue();
14656
14657   // If one of the binop operands was not the result of a concat, we must
14658   // extract a half-sized operand for our new narrow binop. We can't just reuse
14659   // the original extract index operand because we may have bitcasted.
14660   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
14661   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
14662   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
14663   SDLoc DL(Extract);
14664
14665   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
14666   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
14667   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
14668   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
14669                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14670                                     BinOp.getOperand(0),
14671                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14672
14673   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
14674                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14675                                     BinOp.getOperand(1),
14676                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14677
14678   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
14679   return DAG.getBitcast(VT, NarrowBinOp);
14680 }
14681
14682 /// If we are extracting a subvector from a wide vector load, convert to a
14683 /// narrow load to eliminate the extraction:
14684 /// (extract_subvector (load wide vector)) --> (load narrow vector)
14685 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
14686   // TODO: Add support for big-endian. The offset calculation must be adjusted.
14687   if (DAG.getDataLayout().isBigEndian())
14688     return SDValue();
14689
14690   // TODO: The one-use check is overly conservative. Check the cost of the
14691   // extract instead or remove that condition entirely.
14692   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
14693   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14694   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
14695       !ExtIdx)
14696     return SDValue();
14697
14698   // The narrow load will be offset from the base address of the old load if
14699   // we are extracting from something besides index 0 (little-endian).
14700   EVT VT = Extract->getValueType(0);
14701   SDLoc DL(Extract);
14702   SDValue BaseAddr = Ld->getOperand(1);
14703   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
14704
14705   // TODO: Use "BaseIndexOffset" to make this more effective.
14706   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
14707   MachineFunction &MF = DAG.getMachineFunction();
14708   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
14709                                                    VT.getStoreSize());
14710   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
14711   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
14712   return NewLd;
14713 }
14714
14715 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
14716   EVT NVT = N->getValueType(0);
14717   SDValue V = N->getOperand(0);
14718
14719   // Extract from UNDEF is UNDEF.
14720   if (V.isUndef())
14721     return DAG.getUNDEF(NVT);
14722
14723   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
14724     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
14725       return NarrowLoad;
14726
14727   // Combine:
14728   //    (extract_subvec (concat V1, V2, ...), i)
14729   // Into:
14730   //    Vi if possible
14731   // Only operand 0 is checked as 'concat' assumes all inputs of the same
14732   // type.
14733   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
14734       isa<ConstantSDNode>(N->getOperand(1)) &&
14735       V->getOperand(0).getValueType() == NVT) {
14736     unsigned Idx = N->getConstantOperandVal(1);
14737     unsigned NumElems = NVT.getVectorNumElements();
14738     assert((Idx % NumElems) == 0 &&
14739            "IDX in concat is not a multiple of the result vector length.");
14740     return V->getOperand(Idx / NumElems);
14741   }
14742
14743   // Skip bitcasting
14744   if (V->getOpcode() == ISD::BITCAST)
14745     V = V.getOperand(0);
14746
14747   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
14748     // Handle only simple case where vector being inserted and vector
14749     // being extracted are of same size.
14750     EVT SmallVT = V->getOperand(1).getValueType();
14751     if (!NVT.bitsEq(SmallVT))
14752       return SDValue();
14753
14754     // Only handle cases where both indexes are constants.
14755     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14756     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
14757
14758     if (InsIdx && ExtIdx) {
14759       // Combine:
14760       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
14761       // Into:
14762       //    indices are equal or bit offsets are equal => V1
14763       //    otherwise => (extract_subvec V1, ExtIdx)
14764       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
14765           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
14766         return DAG.getBitcast(NVT, V->getOperand(1));
14767       return DAG.getNode(
14768           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
14769           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
14770           N->getOperand(1));
14771     }
14772   }
14773
14774   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
14775     return NarrowBOp;
14776
14777   return SDValue();
14778 }
14779
14780 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
14781                                                  SDValue V, SelectionDAG &DAG) {
14782   SDLoc DL(V);
14783   EVT VT = V.getValueType();
14784
14785   switch (V.getOpcode()) {
14786   default:
14787     return V;
14788
14789   case ISD::CONCAT_VECTORS: {
14790     EVT OpVT = V->getOperand(0).getValueType();
14791     int OpSize = OpVT.getVectorNumElements();
14792     SmallBitVector OpUsedElements(OpSize, false);
14793     bool FoundSimplification = false;
14794     SmallVector<SDValue, 4> NewOps;
14795     NewOps.reserve(V->getNumOperands());
14796     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
14797       SDValue Op = V->getOperand(i);
14798       bool OpUsed = false;
14799       for (int j = 0; j < OpSize; ++j)
14800         if (UsedElements[i * OpSize + j]) {
14801           OpUsedElements[j] = true;
14802           OpUsed = true;
14803         }
14804       NewOps.push_back(
14805           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
14806                  : DAG.getUNDEF(OpVT));
14807       FoundSimplification |= Op == NewOps.back();
14808       OpUsedElements.reset();
14809     }
14810     if (FoundSimplification)
14811       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
14812     return V;
14813   }
14814
14815   case ISD::INSERT_SUBVECTOR: {
14816     SDValue BaseV = V->getOperand(0);
14817     SDValue SubV = V->getOperand(1);
14818     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
14819     if (!IdxN)
14820       return V;
14821
14822     int SubSize = SubV.getValueType().getVectorNumElements();
14823     int Idx = IdxN->getZExtValue();
14824     bool SubVectorUsed = false;
14825     SmallBitVector SubUsedElements(SubSize, false);
14826     for (int i = 0; i < SubSize; ++i)
14827       if (UsedElements[i + Idx]) {
14828         SubVectorUsed = true;
14829         SubUsedElements[i] = true;
14830         UsedElements[i + Idx] = false;
14831       }
14832
14833     // Now recurse on both the base and sub vectors.
14834     SDValue SimplifiedSubV =
14835         SubVectorUsed
14836             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
14837             : DAG.getUNDEF(SubV.getValueType());
14838     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
14839     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
14840       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
14841                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
14842     return V;
14843   }
14844   }
14845 }
14846
14847 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
14848                                        SDValue N1, SelectionDAG &DAG) {
14849   EVT VT = SVN->getValueType(0);
14850   int NumElts = VT.getVectorNumElements();
14851   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
14852   for (int M : SVN->getMask())
14853     if (M >= 0 && M < NumElts)
14854       N0UsedElements[M] = true;
14855     else if (M >= NumElts)
14856       N1UsedElements[M - NumElts] = true;
14857
14858   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
14859   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
14860   if (S0 == N0 && S1 == N1)
14861     return SDValue();
14862
14863   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
14864 }
14865
14866 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
14867 // or turn a shuffle of a single concat into simpler shuffle then concat.
14868 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
14869   EVT VT = N->getValueType(0);
14870   unsigned NumElts = VT.getVectorNumElements();
14871
14872   SDValue N0 = N->getOperand(0);
14873   SDValue N1 = N->getOperand(1);
14874   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14875
14876   SmallVector<SDValue, 4> Ops;
14877   EVT ConcatVT = N0.getOperand(0).getValueType();
14878   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
14879   unsigned NumConcats = NumElts / NumElemsPerConcat;
14880
14881   // Special case: shuffle(concat(A,B)) can be more efficiently represented
14882   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
14883   // half vector elements.
14884   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
14885       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
14886                   SVN->getMask().end(), [](int i) { return i == -1; })) {
14887     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
14888                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
14889     N1 = DAG.getUNDEF(ConcatVT);
14890     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
14891   }
14892
14893   // Look at every vector that's inserted. We're looking for exact
14894   // subvector-sized copies from a concatenated vector
14895   for (unsigned I = 0; I != NumConcats; ++I) {
14896     // Make sure we're dealing with a copy.
14897     unsigned Begin = I * NumElemsPerConcat;
14898     bool AllUndef = true, NoUndef = true;
14899     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
14900       if (SVN->getMaskElt(J) >= 0)
14901         AllUndef = false;
14902       else
14903         NoUndef = false;
14904     }
14905
14906     if (NoUndef) {
14907       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
14908         return SDValue();
14909
14910       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
14911         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
14912           return SDValue();
14913
14914       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
14915       if (FirstElt < N0.getNumOperands())
14916         Ops.push_back(N0.getOperand(FirstElt));
14917       else
14918         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
14919
14920     } else if (AllUndef) {
14921       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
14922     } else { // Mixed with general masks and undefs, can't do optimization.
14923       return SDValue();
14924     }
14925   }
14926
14927   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
14928 }
14929
14930 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14931 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14932 //
14933 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
14934 // a simplification in some sense, but it isn't appropriate in general: some
14935 // BUILD_VECTORs are substantially cheaper than others. The general case
14936 // of a BUILD_VECTOR requires inserting each element individually (or
14937 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
14938 // all constants is a single constant pool load.  A BUILD_VECTOR where each
14939 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
14940 // are undef lowers to a small number of element insertions.
14941 //
14942 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
14943 // We don't fold shuffles where one side is a non-zero constant, and we don't
14944 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
14945 // non-constant operands. This seems to work out reasonably well in practice.
14946 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
14947                                        SelectionDAG &DAG,
14948                                        const TargetLowering &TLI) {
14949   EVT VT = SVN->getValueType(0);
14950   unsigned NumElts = VT.getVectorNumElements();
14951   SDValue N0 = SVN->getOperand(0);
14952   SDValue N1 = SVN->getOperand(1);
14953
14954   if (!N0->hasOneUse() || !N1->hasOneUse())
14955     return SDValue();
14956   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
14957   // discussed above.
14958   if (!N1.isUndef()) {
14959     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
14960     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
14961     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
14962       return SDValue();
14963     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
14964       return SDValue();
14965   }
14966
14967   SmallVector<SDValue, 8> Ops;
14968   SmallSet<SDValue, 16> DuplicateOps;
14969   for (int M : SVN->getMask()) {
14970     SDValue Op = DAG.getUNDEF(VT.getScalarType());
14971     if (M >= 0) {
14972       int Idx = M < (int)NumElts ? M : M - NumElts;
14973       SDValue &S = (M < (int)NumElts ? N0 : N1);
14974       if (S.getOpcode() == ISD::BUILD_VECTOR) {
14975         Op = S.getOperand(Idx);
14976       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14977         if (Idx == 0)
14978           Op = S.getOperand(0);
14979       } else {
14980         // Operand can't be combined - bail out.
14981         return SDValue();
14982       }
14983     }
14984
14985     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
14986     // fine, but it's likely to generate low-quality code if the target can't
14987     // reconstruct an appropriate shuffle.
14988     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
14989       if (!DuplicateOps.insert(Op).second)
14990         return SDValue();
14991
14992     Ops.push_back(Op);
14993   }
14994   // BUILD_VECTOR requires all inputs to be of the same type, find the
14995   // maximum type and extend them all.
14996   EVT SVT = VT.getScalarType();
14997   if (SVT.isInteger())
14998     for (SDValue &Op : Ops)
14999       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15000   if (SVT != VT.getScalarType())
15001     for (SDValue &Op : Ops)
15002       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15003                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15004                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15005   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15006 }
15007
15008 // Match shuffles that can be converted to any_vector_extend_in_reg.
15009 // This is often generated during legalization.
15010 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15011 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15012 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15013                                             SelectionDAG &DAG,
15014                                             const TargetLowering &TLI,
15015                                             bool LegalOperations) {
15016   EVT VT = SVN->getValueType(0);
15017   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15018
15019   // TODO Add support for big-endian when we have a test case.
15020   if (!VT.isInteger() || IsBigEndian)
15021     return SDValue();
15022
15023   unsigned NumElts = VT.getVectorNumElements();
15024   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15025   ArrayRef<int> Mask = SVN->getMask();
15026   SDValue N0 = SVN->getOperand(0);
15027
15028   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15029   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15030     for (unsigned i = 0; i != NumElts; ++i) {
15031       if (Mask[i] < 0)
15032         continue;
15033       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15034         continue;
15035       return false;
15036     }
15037     return true;
15038   };
15039
15040   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15041   // power-of-2 extensions as they are the most likely.
15042   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15043     if (!isAnyExtend(Scale))
15044       continue;
15045
15046     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15047     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15048     if (!LegalOperations ||
15049         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15050       return DAG.getBitcast(VT,
15051                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15052   }
15053
15054   return SDValue();
15055 }
15056
15057 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15058 // each source element of a large type into the lowest elements of a smaller
15059 // destination type. This is often generated during legalization.
15060 // If the source node itself was a '*_extend_vector_inreg' node then we should
15061 // then be able to remove it.
15062 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15063                                         SelectionDAG &DAG) {
15064   EVT VT = SVN->getValueType(0);
15065   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15066
15067   // TODO Add support for big-endian when we have a test case.
15068   if (!VT.isInteger() || IsBigEndian)
15069     return SDValue();
15070
15071   SDValue N0 = SVN->getOperand(0);
15072   while (N0.getOpcode() == ISD::BITCAST)
15073     N0 = N0.getOperand(0);
15074
15075   unsigned Opcode = N0.getOpcode();
15076   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15077       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15078       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15079     return SDValue();
15080
15081   SDValue N00 = N0.getOperand(0);
15082   ArrayRef<int> Mask = SVN->getMask();
15083   unsigned NumElts = VT.getVectorNumElements();
15084   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15085   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15086
15087   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15088   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15089   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15090   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15091     for (unsigned i = 0; i != NumElts; ++i) {
15092       if (Mask[i] < 0)
15093         continue;
15094       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15095         continue;
15096       return false;
15097     }
15098     return true;
15099   };
15100
15101   // At the moment we just handle the case where we've truncated back to the
15102   // same size as before the extension.
15103   // TODO: handle more extension/truncation cases as cases arise.
15104   if (EltSizeInBits != ExtSrcSizeInBits)
15105     return SDValue();
15106
15107   // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
15108   // power-of-2 truncations as they are the most likely.
15109   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
15110     if (isTruncate(Scale))
15111       return DAG.getBitcast(VT, N00);
15112
15113   return SDValue();
15114 }
15115
15116 // Combine shuffles of splat-shuffles of the form:
15117 // shuffle (shuffle V, undef, splat-mask), undef, M
15118 // If splat-mask contains undef elements, we need to be careful about
15119 // introducing undef's in the folded mask which are not the result of composing
15120 // the masks of the shuffles.
15121 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15122                                      ShuffleVectorSDNode *Splat,
15123                                      SelectionDAG &DAG) {
15124   ArrayRef<int> SplatMask = Splat->getMask();
15125   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15126
15127   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15128   // every undef mask element in the splat-shuffle has a corresponding undef
15129   // element in the user-shuffle's mask or if the composition of mask elements
15130   // would result in undef.
15131   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15132   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15133   //   In this case it is not legal to simplify to the splat-shuffle because we
15134   //   may be exposing the users of the shuffle an undef element at index 1
15135   //   which was not there before the combine.
15136   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15137   //   In this case the composition of masks yields SplatMask, so it's ok to
15138   //   simplify to the splat-shuffle.
15139   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15140   //   In this case the composed mask includes all undef elements of SplatMask
15141   //   and in addition sets element zero to undef. It is safe to simplify to
15142   //   the splat-shuffle.
15143   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15144                                        ArrayRef<int> SplatMask) {
15145     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15146       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15147           SplatMask[UserMask[i]] != -1)
15148         return false;
15149     return true;
15150   };
15151   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15152     return SDValue(Splat, 0);
15153
15154   // Create a new shuffle with a mask that is composed of the two shuffles'
15155   // masks.
15156   SmallVector<int, 32> NewMask;
15157   for (int Idx : UserMask)
15158     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15159
15160   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15161                               Splat->getOperand(0), Splat->getOperand(1),
15162                               NewMask);
15163 }
15164
15165 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15166   EVT VT = N->getValueType(0);
15167   unsigned NumElts = VT.getVectorNumElements();
15168
15169   SDValue N0 = N->getOperand(0);
15170   SDValue N1 = N->getOperand(1);
15171
15172   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15173
15174   // Canonicalize shuffle undef, undef -> undef
15175   if (N0.isUndef() && N1.isUndef())
15176     return DAG.getUNDEF(VT);
15177
15178   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15179
15180   // Canonicalize shuffle v, v -> v, undef
15181   if (N0 == N1) {
15182     SmallVector<int, 8> NewMask;
15183     for (unsigned i = 0; i != NumElts; ++i) {
15184       int Idx = SVN->getMaskElt(i);
15185       if (Idx >= (int)NumElts) Idx -= NumElts;
15186       NewMask.push_back(Idx);
15187     }
15188     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15189   }
15190
15191   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15192   if (N0.isUndef())
15193     return DAG.getCommutedVectorShuffle(*SVN);
15194
15195   // Remove references to rhs if it is undef
15196   if (N1.isUndef()) {
15197     bool Changed = false;
15198     SmallVector<int, 8> NewMask;
15199     for (unsigned i = 0; i != NumElts; ++i) {
15200       int Idx = SVN->getMaskElt(i);
15201       if (Idx >= (int)NumElts) {
15202         Idx = -1;
15203         Changed = true;
15204       }
15205       NewMask.push_back(Idx);
15206     }
15207     if (Changed)
15208       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15209   }
15210
15211   // A shuffle of a single vector that is a splat can always be folded.
15212   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15213     if (N1->isUndef() && N0Shuf->isSplat())
15214       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15215
15216   // If it is a splat, check if the argument vector is another splat or a
15217   // build_vector.
15218   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15219     SDNode *V = N0.getNode();
15220
15221     // If this is a bit convert that changes the element type of the vector but
15222     // not the number of vector elements, look through it.  Be careful not to
15223     // look though conversions that change things like v4f32 to v2f64.
15224     if (V->getOpcode() == ISD::BITCAST) {
15225       SDValue ConvInput = V->getOperand(0);
15226       if (ConvInput.getValueType().isVector() &&
15227           ConvInput.getValueType().getVectorNumElements() == NumElts)
15228         V = ConvInput.getNode();
15229     }
15230
15231     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15232       assert(V->getNumOperands() == NumElts &&
15233              "BUILD_VECTOR has wrong number of operands");
15234       SDValue Base;
15235       bool AllSame = true;
15236       for (unsigned i = 0; i != NumElts; ++i) {
15237         if (!V->getOperand(i).isUndef()) {
15238           Base = V->getOperand(i);
15239           break;
15240         }
15241       }
15242       // Splat of <u, u, u, u>, return <u, u, u, u>
15243       if (!Base.getNode())
15244         return N0;
15245       for (unsigned i = 0; i != NumElts; ++i) {
15246         if (V->getOperand(i) != Base) {
15247           AllSame = false;
15248           break;
15249         }
15250       }
15251       // Splat of <x, x, x, x>, return <x, x, x, x>
15252       if (AllSame)
15253         return N0;
15254
15255       // Canonicalize any other splat as a build_vector.
15256       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15257       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15258       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15259
15260       // We may have jumped through bitcasts, so the type of the
15261       // BUILD_VECTOR may not match the type of the shuffle.
15262       if (V->getValueType(0) != VT)
15263         NewBV = DAG.getBitcast(VT, NewBV);
15264       return NewBV;
15265     }
15266   }
15267
15268   // There are various patterns used to build up a vector from smaller vectors,
15269   // subvectors, or elements. Scan chains of these and replace unused insertions
15270   // or components with undef.
15271   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
15272     return S;
15273
15274   // Match shuffles that can be converted to any_vector_extend_in_reg.
15275   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
15276     return V;
15277
15278   // Combine "truncate_vector_in_reg" style shuffles.
15279   if (SDValue V = combineTruncationShuffle(SVN, DAG))
15280     return V;
15281
15282   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
15283       Level < AfterLegalizeVectorOps &&
15284       (N1.isUndef() ||
15285       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15286        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
15287     if (SDValue V = partitionShuffleOfConcats(N, DAG))
15288       return V;
15289   }
15290
15291   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15292   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15293   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15294     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
15295       return Res;
15296
15297   // If this shuffle only has a single input that is a bitcasted shuffle,
15298   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15299   // back to their original types.
15300   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15301       N1.isUndef() && Level < AfterLegalizeVectorOps &&
15302       TLI.isTypeLegal(VT)) {
15303
15304     // Peek through the bitcast only if there is one user.
15305     SDValue BC0 = N0;
15306     while (BC0.getOpcode() == ISD::BITCAST) {
15307       if (!BC0.hasOneUse())
15308         break;
15309       BC0 = BC0.getOperand(0);
15310     }
15311
15312     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15313       if (Scale == 1)
15314         return SmallVector<int, 8>(Mask.begin(), Mask.end());
15315
15316       SmallVector<int, 8> NewMask;
15317       for (int M : Mask)
15318         for (int s = 0; s != Scale; ++s)
15319           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
15320       return NewMask;
15321     };
15322
15323     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
15324       EVT SVT = VT.getScalarType();
15325       EVT InnerVT = BC0->getValueType(0);
15326       EVT InnerSVT = InnerVT.getScalarType();
15327
15328       // Determine which shuffle works with the smaller scalar type.
15329       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
15330       EVT ScaleSVT = ScaleVT.getScalarType();
15331
15332       if (TLI.isTypeLegal(ScaleVT) &&
15333           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15334           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
15335
15336         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15337         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15338
15339         // Scale the shuffle masks to the smaller scalar type.
15340         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15341         SmallVector<int, 8> InnerMask =
15342             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15343         SmallVector<int, 8> OuterMask =
15344             ScaleShuffleMask(SVN->getMask(), OuterScale);
15345
15346         // Merge the shuffle masks.
15347         SmallVector<int, 8> NewMask;
15348         for (int M : OuterMask)
15349           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
15350
15351         // Test for shuffle mask legality over both commutations.
15352         SDValue SV0 = BC0->getOperand(0);
15353         SDValue SV1 = BC0->getOperand(1);
15354         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15355         if (!LegalMask) {
15356           std::swap(SV0, SV1);
15357           ShuffleVectorSDNode::commuteMask(NewMask);
15358           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15359         }
15360
15361         if (LegalMask) {
15362           SV0 = DAG.getBitcast(ScaleVT, SV0);
15363           SV1 = DAG.getBitcast(ScaleVT, SV1);
15364           return DAG.getBitcast(
15365               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15366         }
15367       }
15368     }
15369   }
15370
15371   // Canonicalize shuffles according to rules:
15372   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15373   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15374   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15375   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15376       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
15377       TLI.isTypeLegal(VT)) {
15378     // The incoming shuffle must be of the same type as the result of the
15379     // current shuffle.
15380     assert(N1->getOperand(0).getValueType() == VT &&
15381            "Shuffle types don't match");
15382
15383     SDValue SV0 = N1->getOperand(0);
15384     SDValue SV1 = N1->getOperand(1);
15385     bool HasSameOp0 = N0 == SV0;
15386     bool IsSV1Undef = SV1.isUndef();
15387     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
15388       // Commute the operands of this shuffle so that next rule
15389       // will trigger.
15390       return DAG.getCommutedVectorShuffle(*SVN);
15391   }
15392
15393   // Try to fold according to rules:
15394   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15395   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15396   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15397   // Don't try to fold shuffles with illegal type.
15398   // Only fold if this shuffle is the only user of the other shuffle.
15399   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
15400       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
15401     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15402
15403     // Don't try to fold splats; they're likely to simplify somehow, or they
15404     // might be free.
15405     if (OtherSV->isSplat())
15406       return SDValue();
15407
15408     // The incoming shuffle must be of the same type as the result of the
15409     // current shuffle.
15410     assert(OtherSV->getOperand(0).getValueType() == VT &&
15411            "Shuffle types don't match");
15412
15413     SDValue SV0, SV1;
15414     SmallVector<int, 4> Mask;
15415     // Compute the combined shuffle mask for a shuffle with SV0 as the first
15416     // operand, and SV1 as the second operand.
15417     for (unsigned i = 0; i != NumElts; ++i) {
15418       int Idx = SVN->getMaskElt(i);
15419       if (Idx < 0) {
15420         // Propagate Undef.
15421         Mask.push_back(Idx);
15422         continue;
15423       }
15424
15425       SDValue CurrentVec;
15426       if (Idx < (int)NumElts) {
15427         // This shuffle index refers to the inner shuffle N0. Lookup the inner
15428         // shuffle mask to identify which vector is actually referenced.
15429         Idx = OtherSV->getMaskElt(Idx);
15430         if (Idx < 0) {
15431           // Propagate Undef.
15432           Mask.push_back(Idx);
15433           continue;
15434         }
15435
15436         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
15437                                            : OtherSV->getOperand(1);
15438       } else {
15439         // This shuffle index references an element within N1.
15440         CurrentVec = N1;
15441       }
15442
15443       // Simple case where 'CurrentVec' is UNDEF.
15444       if (CurrentVec.isUndef()) {
15445         Mask.push_back(-1);
15446         continue;
15447       }
15448
15449       // Canonicalize the shuffle index. We don't know yet if CurrentVec
15450       // will be the first or second operand of the combined shuffle.
15451       Idx = Idx % NumElts;
15452       if (!SV0.getNode() || SV0 == CurrentVec) {
15453         // Ok. CurrentVec is the left hand side.
15454         // Update the mask accordingly.
15455         SV0 = CurrentVec;
15456         Mask.push_back(Idx);
15457         continue;
15458       }
15459
15460       // Bail out if we cannot convert the shuffle pair into a single shuffle.
15461       if (SV1.getNode() && SV1 != CurrentVec)
15462         return SDValue();
15463
15464       // Ok. CurrentVec is the right hand side.
15465       // Update the mask accordingly.
15466       SV1 = CurrentVec;
15467       Mask.push_back(Idx + NumElts);
15468     }
15469
15470     // Check if all indices in Mask are Undef. In case, propagate Undef.
15471     bool isUndefMask = true;
15472     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
15473       isUndefMask &= Mask[i] < 0;
15474
15475     if (isUndefMask)
15476       return DAG.getUNDEF(VT);
15477
15478     if (!SV0.getNode())
15479       SV0 = DAG.getUNDEF(VT);
15480     if (!SV1.getNode())
15481       SV1 = DAG.getUNDEF(VT);
15482
15483     // Avoid introducing shuffles with illegal mask.
15484     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
15485       ShuffleVectorSDNode::commuteMask(Mask);
15486
15487       if (!TLI.isShuffleMaskLegal(Mask, VT))
15488         return SDValue();
15489
15490       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
15491       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
15492       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
15493       std::swap(SV0, SV1);
15494     }
15495
15496     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15497     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15498     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15499     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
15500   }
15501
15502   return SDValue();
15503 }
15504
15505 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
15506   SDValue InVal = N->getOperand(0);
15507   EVT VT = N->getValueType(0);
15508
15509   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
15510   // with a VECTOR_SHUFFLE.
15511   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15512     SDValue InVec = InVal->getOperand(0);
15513     SDValue EltNo = InVal->getOperand(1);
15514
15515     // FIXME: We could support implicit truncation if the shuffle can be
15516     // scaled to a smaller vector scalar type.
15517     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
15518     if (C0 && VT == InVec.getValueType() &&
15519         VT.getScalarType() == InVal.getValueType()) {
15520       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
15521       int Elt = C0->getZExtValue();
15522       NewMask[0] = Elt;
15523
15524       if (TLI.isShuffleMaskLegal(NewMask, VT))
15525         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
15526                                     NewMask);
15527     }
15528   }
15529
15530   return SDValue();
15531 }
15532
15533 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
15534   EVT VT = N->getValueType(0);
15535   SDValue N0 = N->getOperand(0);
15536   SDValue N1 = N->getOperand(1);
15537   SDValue N2 = N->getOperand(2);
15538
15539   // If inserting an UNDEF, just return the original vector.
15540   if (N1.isUndef())
15541     return N0;
15542
15543   // If this is an insert of an extracted vector into an undef vector, we can
15544   // just use the input to the extract.
15545   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15546       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
15547     return N1.getOperand(0);
15548
15549   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
15550   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
15551   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
15552   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
15553       N0.getOperand(1).getValueType() == N1.getValueType() &&
15554       N0.getOperand(2) == N2)
15555     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
15556                        N1, N2);
15557
15558   if (!isa<ConstantSDNode>(N2))
15559     return SDValue();
15560
15561   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
15562
15563   // Canonicalize insert_subvector dag nodes.
15564   // Example:
15565   // (insert_subvector (insert_subvector A, Idx0), Idx1)
15566   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
15567   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
15568       N1.getValueType() == N0.getOperand(1).getValueType() &&
15569       isa<ConstantSDNode>(N0.getOperand(2))) {
15570     unsigned OtherIdx = N0.getConstantOperandVal(2);
15571     if (InsIdx < OtherIdx) {
15572       // Swap nodes.
15573       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
15574                                   N0.getOperand(0), N1, N2);
15575       AddToWorklist(NewOp.getNode());
15576       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
15577                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
15578     }
15579   }
15580
15581   // If the input vector is a concatenation, and the insert replaces
15582   // one of the pieces, we can optimize into a single concat_vectors.
15583   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
15584       N0.getOperand(0).getValueType() == N1.getValueType()) {
15585     unsigned Factor = N1.getValueType().getVectorNumElements();
15586
15587     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
15588     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
15589
15590     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15591   }
15592
15593   return SDValue();
15594 }
15595
15596 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
15597   SDValue N0 = N->getOperand(0);
15598
15599   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
15600   if (N0->getOpcode() == ISD::FP16_TO_FP)
15601     return N0->getOperand(0);
15602
15603   return SDValue();
15604 }
15605
15606 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
15607   SDValue N0 = N->getOperand(0);
15608
15609   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
15610   if (N0->getOpcode() == ISD::AND) {
15611     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
15612     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
15613       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
15614                          N0.getOperand(0));
15615     }
15616   }
15617
15618   return SDValue();
15619 }
15620
15621 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
15622 /// with the destination vector and a zero vector.
15623 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
15624 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
15625 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
15626   EVT VT = N->getValueType(0);
15627   SDValue LHS = N->getOperand(0);
15628   SDValue RHS = N->getOperand(1);
15629   SDLoc DL(N);
15630
15631   // Make sure we're not running after operation legalization where it
15632   // may have custom lowered the vector shuffles.
15633   if (LegalOperations)
15634     return SDValue();
15635
15636   if (N->getOpcode() != ISD::AND)
15637     return SDValue();
15638
15639   if (RHS.getOpcode() == ISD::BITCAST)
15640     RHS = RHS.getOperand(0);
15641
15642   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
15643     return SDValue();
15644
15645   EVT RVT = RHS.getValueType();
15646   unsigned NumElts = RHS.getNumOperands();
15647
15648   // Attempt to create a valid clear mask, splitting the mask into
15649   // sub elements and checking to see if each is
15650   // all zeros or all ones - suitable for shuffle masking.
15651   auto BuildClearMask = [&](int Split) {
15652     int NumSubElts = NumElts * Split;
15653     int NumSubBits = RVT.getScalarSizeInBits() / Split;
15654
15655     SmallVector<int, 8> Indices;
15656     for (int i = 0; i != NumSubElts; ++i) {
15657       int EltIdx = i / Split;
15658       int SubIdx = i % Split;
15659       SDValue Elt = RHS.getOperand(EltIdx);
15660       if (Elt.isUndef()) {
15661         Indices.push_back(-1);
15662         continue;
15663       }
15664
15665       APInt Bits;
15666       if (isa<ConstantSDNode>(Elt))
15667         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
15668       else if (isa<ConstantFPSDNode>(Elt))
15669         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
15670       else
15671         return SDValue();
15672
15673       // Extract the sub element from the constant bit mask.
15674       if (DAG.getDataLayout().isBigEndian()) {
15675         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
15676       } else {
15677         Bits.lshrInPlace(SubIdx * NumSubBits);
15678       }
15679
15680       if (Split > 1)
15681         Bits = Bits.trunc(NumSubBits);
15682
15683       if (Bits.isAllOnesValue())
15684         Indices.push_back(i);
15685       else if (Bits == 0)
15686         Indices.push_back(i + NumSubElts);
15687       else
15688         return SDValue();
15689     }
15690
15691     // Let's see if the target supports this vector_shuffle.
15692     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
15693     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
15694     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
15695       return SDValue();
15696
15697     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
15698     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
15699                                                    DAG.getBitcast(ClearVT, LHS),
15700                                                    Zero, Indices));
15701   };
15702
15703   // Determine maximum split level (byte level masking).
15704   int MaxSplit = 1;
15705   if (RVT.getScalarSizeInBits() % 8 == 0)
15706     MaxSplit = RVT.getScalarSizeInBits() / 8;
15707
15708   for (int Split = 1; Split <= MaxSplit; ++Split)
15709     if (RVT.getScalarSizeInBits() % Split == 0)
15710       if (SDValue S = BuildClearMask(Split))
15711         return S;
15712
15713   return SDValue();
15714 }
15715
15716 /// Visit a binary vector operation, like ADD.
15717 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
15718   assert(N->getValueType(0).isVector() &&
15719          "SimplifyVBinOp only works on vectors!");
15720
15721   SDValue LHS = N->getOperand(0);
15722   SDValue RHS = N->getOperand(1);
15723   SDValue Ops[] = {LHS, RHS};
15724
15725   // See if we can constant fold the vector operation.
15726   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
15727           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
15728     return Fold;
15729
15730   // Try to convert a constant mask AND into a shuffle clear mask.
15731   if (SDValue Shuffle = XformToShuffleWithZero(N))
15732     return Shuffle;
15733
15734   // Type legalization might introduce new shuffles in the DAG.
15735   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
15736   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
15737   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
15738       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
15739       LHS.getOperand(1).isUndef() &&
15740       RHS.getOperand(1).isUndef()) {
15741     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
15742     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
15743
15744     if (SVN0->getMask().equals(SVN1->getMask())) {
15745       EVT VT = N->getValueType(0);
15746       SDValue UndefVector = LHS.getOperand(1);
15747       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
15748                                      LHS.getOperand(0), RHS.getOperand(0),
15749                                      N->getFlags());
15750       AddUsersToWorklist(N);
15751       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
15752                                   SVN0->getMask());
15753     }
15754   }
15755
15756   return SDValue();
15757 }
15758
15759 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
15760                                     SDValue N2) {
15761   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
15762
15763   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
15764                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15765
15766   // If we got a simplified select_cc node back from SimplifySelectCC, then
15767   // break it down into a new SETCC node, and a new SELECT node, and then return
15768   // the SELECT node, since we were called with a SELECT node.
15769   if (SCC.getNode()) {
15770     // Check to see if we got a select_cc back (to turn into setcc/select).
15771     // Otherwise, just return whatever node we got back, like fabs.
15772     if (SCC.getOpcode() == ISD::SELECT_CC) {
15773       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
15774                                   N0.getValueType(),
15775                                   SCC.getOperand(0), SCC.getOperand(1),
15776                                   SCC.getOperand(4));
15777       AddToWorklist(SETCC.getNode());
15778       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
15779                            SCC.getOperand(2), SCC.getOperand(3));
15780     }
15781
15782     return SCC;
15783   }
15784   return SDValue();
15785 }
15786
15787 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
15788 /// being selected between, see if we can simplify the select.  Callers of this
15789 /// should assume that TheSelect is deleted if this returns true.  As such, they
15790 /// should return the appropriate thing (e.g. the node) back to the top-level of
15791 /// the DAG combiner loop to avoid it being looked at.
15792 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
15793                                     SDValue RHS) {
15794
15795   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15796   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
15797   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
15798     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
15799       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
15800       SDValue Sqrt = RHS;
15801       ISD::CondCode CC;
15802       SDValue CmpLHS;
15803       const ConstantFPSDNode *Zero = nullptr;
15804
15805       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
15806         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
15807         CmpLHS = TheSelect->getOperand(0);
15808         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
15809       } else {
15810         // SELECT or VSELECT
15811         SDValue Cmp = TheSelect->getOperand(0);
15812         if (Cmp.getOpcode() == ISD::SETCC) {
15813           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
15814           CmpLHS = Cmp.getOperand(0);
15815           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
15816         }
15817       }
15818       if (Zero && Zero->isZero() &&
15819           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
15820           CC == ISD::SETULT || CC == ISD::SETLT)) {
15821         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15822         CombineTo(TheSelect, Sqrt);
15823         return true;
15824       }
15825     }
15826   }
15827   // Cannot simplify select with vector condition
15828   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
15829
15830   // If this is a select from two identical things, try to pull the operation
15831   // through the select.
15832   if (LHS.getOpcode() != RHS.getOpcode() ||
15833       !LHS.hasOneUse() || !RHS.hasOneUse())
15834     return false;
15835
15836   // If this is a load and the token chain is identical, replace the select
15837   // of two loads with a load through a select of the address to load from.
15838   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
15839   // constants have been dropped into the constant pool.
15840   if (LHS.getOpcode() == ISD::LOAD) {
15841     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
15842     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
15843
15844     // Token chains must be identical.
15845     if (LHS.getOperand(0) != RHS.getOperand(0) ||
15846         // Do not let this transformation reduce the number of volatile loads.
15847         LLD->isVolatile() || RLD->isVolatile() ||
15848         // FIXME: If either is a pre/post inc/dec load,
15849         // we'd need to split out the address adjustment.
15850         LLD->isIndexed() || RLD->isIndexed() ||
15851         // If this is an EXTLOAD, the VT's must match.
15852         LLD->getMemoryVT() != RLD->getMemoryVT() ||
15853         // If this is an EXTLOAD, the kind of extension must match.
15854         (LLD->getExtensionType() != RLD->getExtensionType() &&
15855          // The only exception is if one of the extensions is anyext.
15856          LLD->getExtensionType() != ISD::EXTLOAD &&
15857          RLD->getExtensionType() != ISD::EXTLOAD) ||
15858         // FIXME: this discards src value information.  This is
15859         // over-conservative. It would be beneficial to be able to remember
15860         // both potential memory locations.  Since we are discarding
15861         // src value info, don't do the transformation if the memory
15862         // locations are not in the default address space.
15863         LLD->getPointerInfo().getAddrSpace() != 0 ||
15864         RLD->getPointerInfo().getAddrSpace() != 0 ||
15865         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
15866                                       LLD->getBasePtr().getValueType()))
15867       return false;
15868
15869     // Check that the select condition doesn't reach either load.  If so,
15870     // folding this will induce a cycle into the DAG.  If not, this is safe to
15871     // xform, so create a select of the addresses.
15872     SDValue Addr;
15873     if (TheSelect->getOpcode() == ISD::SELECT) {
15874       SDNode *CondNode = TheSelect->getOperand(0).getNode();
15875       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
15876           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
15877         return false;
15878       // The loads must not depend on one another.
15879       if (LLD->isPredecessorOf(RLD) ||
15880           RLD->isPredecessorOf(LLD))
15881         return false;
15882       Addr = DAG.getSelect(SDLoc(TheSelect),
15883                            LLD->getBasePtr().getValueType(),
15884                            TheSelect->getOperand(0), LLD->getBasePtr(),
15885                            RLD->getBasePtr());
15886     } else {  // Otherwise SELECT_CC
15887       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
15888       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
15889
15890       if ((LLD->hasAnyUseOfValue(1) &&
15891            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
15892           (RLD->hasAnyUseOfValue(1) &&
15893            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
15894         return false;
15895
15896       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
15897                          LLD->getBasePtr().getValueType(),
15898                          TheSelect->getOperand(0),
15899                          TheSelect->getOperand(1),
15900                          LLD->getBasePtr(), RLD->getBasePtr(),
15901                          TheSelect->getOperand(4));
15902     }
15903
15904     SDValue Load;
15905     // It is safe to replace the two loads if they have different alignments,
15906     // but the new load must be the minimum (most restrictive) alignment of the
15907     // inputs.
15908     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
15909     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
15910     if (!RLD->isInvariant())
15911       MMOFlags &= ~MachineMemOperand::MOInvariant;
15912     if (!RLD->isDereferenceable())
15913       MMOFlags &= ~MachineMemOperand::MODereferenceable;
15914     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
15915       // FIXME: Discards pointer and AA info.
15916       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
15917                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
15918                          MMOFlags);
15919     } else {
15920       // FIXME: Discards pointer and AA info.
15921       Load = DAG.getExtLoad(
15922           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
15923                                                   : LLD->getExtensionType(),
15924           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
15925           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
15926     }
15927
15928     // Users of the select now use the result of the load.
15929     CombineTo(TheSelect, Load);
15930
15931     // Users of the old loads now use the new load's chain.  We know the
15932     // old-load value is dead now.
15933     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
15934     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
15935     return true;
15936   }
15937
15938   return false;
15939 }
15940
15941 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
15942 /// bitwise 'and'.
15943 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
15944                                             SDValue N1, SDValue N2, SDValue N3,
15945                                             ISD::CondCode CC) {
15946   // If this is a select where the false operand is zero and the compare is a
15947   // check of the sign bit, see if we can perform the "gzip trick":
15948   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
15949   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
15950   EVT XType = N0.getValueType();
15951   EVT AType = N2.getValueType();
15952   if (!isNullConstant(N3) || !XType.bitsGE(AType))
15953     return SDValue();
15954
15955   // If the comparison is testing for a positive value, we have to invert
15956   // the sign bit mask, so only do that transform if the target has a bitwise
15957   // 'and not' instruction (the invert is free).
15958   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
15959     // (X > -1) ? A : 0
15960     // (X >  0) ? X : 0 <-- This is canonical signed max.
15961     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
15962       return SDValue();
15963   } else if (CC == ISD::SETLT) {
15964     // (X <  0) ? A : 0
15965     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
15966     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
15967       return SDValue();
15968   } else {
15969     return SDValue();
15970   }
15971
15972   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
15973   // constant.
15974   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
15975   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15976   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
15977     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
15978     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
15979     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
15980     AddToWorklist(Shift.getNode());
15981
15982     if (XType.bitsGT(AType)) {
15983       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15984       AddToWorklist(Shift.getNode());
15985     }
15986
15987     if (CC == ISD::SETGT)
15988       Shift = DAG.getNOT(DL, Shift, AType);
15989
15990     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15991   }
15992
15993   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
15994   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
15995   AddToWorklist(Shift.getNode());
15996
15997   if (XType.bitsGT(AType)) {
15998     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15999     AddToWorklist(Shift.getNode());
16000   }
16001
16002   if (CC == ISD::SETGT)
16003     Shift = DAG.getNOT(DL, Shift, AType);
16004
16005   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16006 }
16007
16008 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16009 /// where 'cond' is the comparison specified by CC.
16010 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16011                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16012                                       bool NotExtCompare) {
16013   // (x ? y : y) -> y.
16014   if (N2 == N3) return N2;
16015
16016   EVT VT = N2.getValueType();
16017   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16018   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16019
16020   // Determine if the condition we're dealing with is constant
16021   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16022                               N0, N1, CC, DL, false);
16023   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16024
16025   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16026     // fold select_cc true, x, y -> x
16027     // fold select_cc false, x, y -> y
16028     return !SCCC->isNullValue() ? N2 : N3;
16029   }
16030
16031   // Check to see if we can simplify the select into an fabs node
16032   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16033     // Allow either -0.0 or 0.0
16034     if (CFP->isZero()) {
16035       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16036       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16037           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16038           N2 == N3.getOperand(0))
16039         return DAG.getNode(ISD::FABS, DL, VT, N0);
16040
16041       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16042       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16043           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16044           N2.getOperand(0) == N3)
16045         return DAG.getNode(ISD::FABS, DL, VT, N3);
16046     }
16047   }
16048
16049   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16050   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16051   // in it.  This is a win when the constant is not otherwise available because
16052   // it replaces two constant pool loads with one.  We only do this if the FP
16053   // type is known to be legal, because if it isn't, then we are before legalize
16054   // types an we want the other legalization to happen first (e.g. to avoid
16055   // messing with soft float) and if the ConstantFP is not legal, because if
16056   // it is legal, we may not need to store the FP constant in a constant pool.
16057   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16058     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16059       if (TLI.isTypeLegal(N2.getValueType()) &&
16060           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16061                TargetLowering::Legal &&
16062            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16063            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16064           // If both constants have multiple uses, then we won't need to do an
16065           // extra load, they are likely around in registers for other users.
16066           (TV->hasOneUse() || FV->hasOneUse())) {
16067         Constant *Elts[] = {
16068           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16069           const_cast<ConstantFP*>(TV->getConstantFPValue())
16070         };
16071         Type *FPTy = Elts[0]->getType();
16072         const DataLayout &TD = DAG.getDataLayout();
16073
16074         // Create a ConstantArray of the two constants.
16075         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16076         SDValue CPIdx =
16077             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16078                                 TD.getPrefTypeAlignment(FPTy));
16079         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16080
16081         // Get the offsets to the 0 and 1 element of the array so that we can
16082         // select between them.
16083         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16084         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16085         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16086
16087         SDValue Cond = DAG.getSetCC(DL,
16088                                     getSetCCResultType(N0.getValueType()),
16089                                     N0, N1, CC);
16090         AddToWorklist(Cond.getNode());
16091         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16092                                           Cond, One, Zero);
16093         AddToWorklist(CstOffset.getNode());
16094         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16095                             CstOffset);
16096         AddToWorklist(CPIdx.getNode());
16097         return DAG.getLoad(
16098             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16099             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16100             Alignment);
16101       }
16102     }
16103
16104   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16105     return V;
16106
16107   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16108   // where y is has a single bit set.
16109   // A plaintext description would be, we can turn the SELECT_CC into an AND
16110   // when the condition can be materialized as an all-ones register.  Any
16111   // single bit-test can be materialized as an all-ones register with
16112   // shift-left and shift-right-arith.
16113   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16114       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16115     SDValue AndLHS = N0->getOperand(0);
16116     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16117     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16118       // Shift the tested bit over the sign bit.
16119       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16120       SDValue ShlAmt =
16121         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16122                         getShiftAmountTy(AndLHS.getValueType()));
16123       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16124
16125       // Now arithmetic right shift it all the way over, so the result is either
16126       // all-ones, or zero.
16127       SDValue ShrAmt =
16128         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16129                         getShiftAmountTy(Shl.getValueType()));
16130       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16131
16132       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16133     }
16134   }
16135
16136   // fold select C, 16, 0 -> shl C, 4
16137   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16138       TLI.getBooleanContents(N0.getValueType()) ==
16139           TargetLowering::ZeroOrOneBooleanContent) {
16140
16141     // If the caller doesn't want us to simplify this into a zext of a compare,
16142     // don't do it.
16143     if (NotExtCompare && N2C->isOne())
16144       return SDValue();
16145
16146     // Get a SetCC of the condition
16147     // NOTE: Don't create a SETCC if it's not legal on this target.
16148     if (!LegalOperations ||
16149         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16150       SDValue Temp, SCC;
16151       // cast from setcc result type to select result type
16152       if (LegalTypes) {
16153         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16154                             N0, N1, CC);
16155         if (N2.getValueType().bitsLT(SCC.getValueType()))
16156           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16157                                         N2.getValueType());
16158         else
16159           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16160                              N2.getValueType(), SCC);
16161       } else {
16162         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16163         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16164                            N2.getValueType(), SCC);
16165       }
16166
16167       AddToWorklist(SCC.getNode());
16168       AddToWorklist(Temp.getNode());
16169
16170       if (N2C->isOne())
16171         return Temp;
16172
16173       // shl setcc result by log2 n2c
16174       return DAG.getNode(
16175           ISD::SHL, DL, N2.getValueType(), Temp,
16176           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16177                           getShiftAmountTy(Temp.getValueType())));
16178     }
16179   }
16180
16181   // Check to see if this is an integer abs.
16182   // select_cc setg[te] X,  0,  X, -X ->
16183   // select_cc setgt    X, -1,  X, -X ->
16184   // select_cc setl[te] X,  0, -X,  X ->
16185   // select_cc setlt    X,  1, -X,  X ->
16186   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16187   if (N1C) {
16188     ConstantSDNode *SubC = nullptr;
16189     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
16190          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
16191         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
16192       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16193     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
16194               (N1C->isOne() && CC == ISD::SETLT)) &&
16195              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
16196       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16197
16198     EVT XType = N0.getValueType();
16199     if (SubC && SubC->isNullValue() && XType.isInteger()) {
16200       SDLoc DL(N0);
16201       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16202                                   N0,
16203                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
16204                                          getShiftAmountTy(N0.getValueType())));
16205       SDValue Add = DAG.getNode(ISD::ADD, DL,
16206                                 XType, N0, Shift);
16207       AddToWorklist(Shift.getNode());
16208       AddToWorklist(Add.getNode());
16209       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16210     }
16211   }
16212
16213   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16214   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16215   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16216   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16217   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16218   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16219   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16220   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16221   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
16222     SDValue ValueOnZero = N2;
16223     SDValue Count = N3;
16224     // If the condition is NE instead of E, swap the operands.
16225     if (CC == ISD::SETNE)
16226       std::swap(ValueOnZero, Count);
16227     // Check if the value on zero is a constant equal to the bits in the type.
16228     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
16229       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
16230         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16231         // legal, combine to just cttz.
16232         if ((Count.getOpcode() == ISD::CTTZ ||
16233              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16234             N0 == Count.getOperand(0) &&
16235             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
16236           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16237         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16238         // legal, combine to just ctlz.
16239         if ((Count.getOpcode() == ISD::CTLZ ||
16240              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16241             N0 == Count.getOperand(0) &&
16242             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
16243           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16244       }
16245     }
16246   }
16247
16248   return SDValue();
16249 }
16250
16251 /// This is a stub for TargetLowering::SimplifySetCC.
16252 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16253                                    ISD::CondCode Cond, const SDLoc &DL,
16254                                    bool foldBooleans) {
16255   TargetLowering::DAGCombinerInfo
16256     DagCombineInfo(DAG, Level, false, this);
16257   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16258 }
16259
16260 /// Given an ISD::SDIV node expressing a divide by constant, return
16261 /// a DAG expression to select that will generate the same value by multiplying
16262 /// by a magic number.
16263 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16264 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16265   // when optimising for minimum size, we don't want to expand a div to a mul
16266   // and a shift.
16267   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16268     return SDValue();
16269
16270   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16271   if (!C)
16272     return SDValue();
16273
16274   // Avoid division by zero.
16275   if (C->isNullValue())
16276     return SDValue();
16277
16278   std::vector<SDNode*> Built;
16279   SDValue S =
16280       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16281
16282   for (SDNode *N : Built)
16283     AddToWorklist(N);
16284   return S;
16285 }
16286
16287 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16288 /// DAG expression that will generate the same value by right shifting.
16289 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16290   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16291   if (!C)
16292     return SDValue();
16293
16294   // Avoid division by zero.
16295   if (C->isNullValue())
16296     return SDValue();
16297
16298   std::vector<SDNode *> Built;
16299   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16300
16301   for (SDNode *N : Built)
16302     AddToWorklist(N);
16303   return S;
16304 }
16305
16306 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16307 /// expression that will generate the same value by multiplying by a magic
16308 /// number.
16309 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16310 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16311   // when optimising for minimum size, we don't want to expand a div to a mul
16312   // and a shift.
16313   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16314     return SDValue();
16315
16316   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16317   if (!C)
16318     return SDValue();
16319
16320   // Avoid division by zero.
16321   if (C->isNullValue())
16322     return SDValue();
16323
16324   std::vector<SDNode*> Built;
16325   SDValue S =
16326       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16327
16328   for (SDNode *N : Built)
16329     AddToWorklist(N);
16330   return S;
16331 }
16332
16333 /// Determines the LogBase2 value for a non-null input value using the
16334 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16335 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16336   EVT VT = V.getValueType();
16337   unsigned EltBits = VT.getScalarSizeInBits();
16338   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16339   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16340   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16341   return LogBase2;
16342 }
16343
16344 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16345 /// For the reciprocal, we need to find the zero of the function:
16346 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
16347 ///     =>
16348 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16349 ///     does not require additional intermediate precision]
16350 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16351   if (Level >= AfterLegalizeDAG)
16352     return SDValue();
16353
16354   // TODO: Handle half and/or extended types?
16355   EVT VT = Op.getValueType();
16356   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16357     return SDValue();
16358
16359   // If estimates are explicitly disabled for this function, we're done.
16360   MachineFunction &MF = DAG.getMachineFunction();
16361   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16362   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16363     return SDValue();
16364
16365   // Estimates may be explicitly enabled for this type with a custom number of
16366   // refinement steps.
16367   int Iterations = TLI.getDivRefinementSteps(VT, MF);
16368   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16369     AddToWorklist(Est.getNode());
16370
16371     if (Iterations) {
16372       EVT VT = Op.getValueType();
16373       SDLoc DL(Op);
16374       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16375
16376       // Newton iterations: Est = Est + Est (1 - Arg * Est)
16377       for (int i = 0; i < Iterations; ++i) {
16378         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16379         AddToWorklist(NewEst.getNode());
16380
16381         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16382         AddToWorklist(NewEst.getNode());
16383
16384         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16385         AddToWorklist(NewEst.getNode());
16386
16387         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16388         AddToWorklist(Est.getNode());
16389       }
16390     }
16391     return Est;
16392   }
16393
16394   return SDValue();
16395 }
16396
16397 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16398 /// For the reciprocal sqrt, we need to find the zero of the function:
16399 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16400 ///     =>
16401 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16402 /// As a result, we precompute A/2 prior to the iteration loop.
16403 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16404                                          unsigned Iterations,
16405                                          SDNodeFlags Flags, bool Reciprocal) {
16406   EVT VT = Arg.getValueType();
16407   SDLoc DL(Arg);
16408   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16409
16410   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16411   // this entire sequence requires only one FP constant.
16412   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16413   AddToWorklist(HalfArg.getNode());
16414
16415   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16416   AddToWorklist(HalfArg.getNode());
16417
16418   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16419   for (unsigned i = 0; i < Iterations; ++i) {
16420     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16421     AddToWorklist(NewEst.getNode());
16422
16423     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16424     AddToWorklist(NewEst.getNode());
16425
16426     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
16427     AddToWorklist(NewEst.getNode());
16428
16429     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16430     AddToWorklist(Est.getNode());
16431   }
16432
16433   // If non-reciprocal square root is requested, multiply the result by Arg.
16434   if (!Reciprocal) {
16435     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
16436     AddToWorklist(Est.getNode());
16437   }
16438
16439   return Est;
16440 }
16441
16442 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16443 /// For the reciprocal sqrt, we need to find the zero of the function:
16444 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16445 ///     =>
16446 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
16447 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
16448                                          unsigned Iterations,
16449                                          SDNodeFlags Flags, bool Reciprocal) {
16450   EVT VT = Arg.getValueType();
16451   SDLoc DL(Arg);
16452   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
16453   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
16454
16455   // This routine must enter the loop below to work correctly
16456   // when (Reciprocal == false).
16457   assert(Iterations > 0);
16458
16459   // Newton iterations for reciprocal square root:
16460   // E = (E * -0.5) * ((A * E) * E + -3.0)
16461   for (unsigned i = 0; i < Iterations; ++i) {
16462     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
16463     AddToWorklist(AE.getNode());
16464
16465     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
16466     AddToWorklist(AEE.getNode());
16467
16468     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
16469     AddToWorklist(RHS.getNode());
16470
16471     // When calculating a square root at the last iteration build:
16472     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
16473     // (notice a common subexpression)
16474     SDValue LHS;
16475     if (Reciprocal || (i + 1) < Iterations) {
16476       // RSQRT: LHS = (E * -0.5)
16477       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
16478     } else {
16479       // SQRT: LHS = (A * E) * -0.5
16480       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
16481     }
16482     AddToWorklist(LHS.getNode());
16483
16484     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
16485     AddToWorklist(Est.getNode());
16486   }
16487
16488   return Est;
16489 }
16490
16491 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
16492 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
16493 /// Op can be zero.
16494 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
16495                                            bool Reciprocal) {
16496   if (Level >= AfterLegalizeDAG)
16497     return SDValue();
16498
16499   // TODO: Handle half and/or extended types?
16500   EVT VT = Op.getValueType();
16501   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16502     return SDValue();
16503
16504   // If estimates are explicitly disabled for this function, we're done.
16505   MachineFunction &MF = DAG.getMachineFunction();
16506   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
16507   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16508     return SDValue();
16509
16510   // Estimates may be explicitly enabled for this type with a custom number of
16511   // refinement steps.
16512   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
16513
16514   bool UseOneConstNR = false;
16515   if (SDValue Est =
16516       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
16517                           Reciprocal)) {
16518     AddToWorklist(Est.getNode());
16519
16520     if (Iterations) {
16521       Est = UseOneConstNR
16522             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
16523             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
16524
16525       if (!Reciprocal) {
16526         // Unfortunately, Est is now NaN if the input was exactly 0.0.
16527         // Select out this case and force the answer to 0.0.
16528         EVT VT = Op.getValueType();
16529         SDLoc DL(Op);
16530
16531         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
16532         EVT CCVT = getSetCCResultType(VT);
16533         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
16534         AddToWorklist(ZeroCmp.getNode());
16535
16536         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
16537                           ZeroCmp, FPZero, Est);
16538         AddToWorklist(Est.getNode());
16539       }
16540     }
16541     return Est;
16542   }
16543
16544   return SDValue();
16545 }
16546
16547 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16548   return buildSqrtEstimateImpl(Op, Flags, true);
16549 }
16550
16551 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16552   return buildSqrtEstimateImpl(Op, Flags, false);
16553 }
16554
16555 /// Return true if base is a frame index, which is known not to alias with
16556 /// anything but itself.  Provides base object and offset as results.
16557 static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
16558                            const GlobalValue *&GV, const void *&CV) {
16559   // Assume it is a primitive operation.
16560   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
16561
16562   // If it's an adding a simple constant then integrate the offset.
16563   if (Base.getOpcode() == ISD::ADD) {
16564     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
16565       Base = Base.getOperand(0);
16566       Offset += C->getSExtValue();
16567     }
16568   }
16569
16570   // Return the underlying GlobalValue, and update the Offset.  Return false
16571   // for GlobalAddressSDNode since the same GlobalAddress may be represented
16572   // by multiple nodes with different offsets.
16573   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
16574     GV = G->getGlobal();
16575     Offset += G->getOffset();
16576     return false;
16577   }
16578
16579   // Return the underlying Constant value, and update the Offset.  Return false
16580   // for ConstantSDNodes since the same constant pool entry may be represented
16581   // by multiple nodes with different offsets.
16582   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
16583     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
16584                                          : (const void *)C->getConstVal();
16585     Offset += C->getOffset();
16586     return false;
16587   }
16588   // If it's any of the following then it can't alias with anything but itself.
16589   return isa<FrameIndexSDNode>(Base);
16590 }
16591
16592 /// Return true if there is any possibility that the two addresses overlap.
16593 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
16594   // If they are the same then they must be aliases.
16595   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
16596
16597   // If they are both volatile then they cannot be reordered.
16598   if (Op0->isVolatile() && Op1->isVolatile()) return true;
16599
16600   // If one operation reads from invariant memory, and the other may store, they
16601   // cannot alias. These should really be checking the equivalent of mayWrite,
16602   // but it only matters for memory nodes other than load /store.
16603   if (Op0->isInvariant() && Op1->writeMem())
16604     return false;
16605
16606   if (Op1->isInvariant() && Op0->writeMem())
16607     return false;
16608
16609   unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
16610   unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
16611
16612   // Check for BaseIndexOffset matching.
16613   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
16614   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
16615   if (BasePtr0.equalBaseIndex(BasePtr1))
16616     return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) ||
16617              (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset));
16618
16619   // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
16620   // modified to use BaseIndexOffset.
16621
16622   // Gather base node and offset information.
16623   SDValue Base0, Base1;
16624   int64_t Offset0, Offset1;
16625   const GlobalValue *GV0, *GV1;
16626   const void *CV0, *CV1;
16627   bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
16628                                       Base0, Offset0, GV0, CV0);
16629   bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
16630                                       Base1, Offset1, GV1, CV1);
16631
16632   // If they have the same base address, then check to see if they overlap.
16633   if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
16634     return !((Offset0 + NumBytes0) <= Offset1 ||
16635              (Offset1 + NumBytes1) <= Offset0);
16636
16637   // It is possible for different frame indices to alias each other, mostly
16638   // when tail call optimization reuses return address slots for arguments.
16639   // To catch this case, look up the actual index of frame indices to compute
16640   // the real alias relationship.
16641   if (IsFrameIndex0 && IsFrameIndex1) {
16642     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16643     Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
16644     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
16645     return !((Offset0 + NumBytes0) <= Offset1 ||
16646              (Offset1 + NumBytes1) <= Offset0);
16647   }
16648
16649   // Otherwise, if we know what the bases are, and they aren't identical, then
16650   // we know they cannot alias.
16651   if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
16652     return false;
16653
16654   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
16655   // compared to the size and offset of the access, we may be able to prove they
16656   // do not alias. This check is conservative for now to catch cases created by
16657   // splitting vector types.
16658   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
16659   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
16660   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
16661   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
16662   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
16663       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
16664     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
16665     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
16666
16667     // There is no overlap between these relatively aligned accesses of similar
16668     // size. Return no alias.
16669     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
16670         (OffAlign1 + NumBytes1) <= OffAlign0)
16671       return false;
16672   }
16673
16674   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
16675                    ? CombinerGlobalAA
16676                    : DAG.getSubtarget().useAA();
16677 #ifndef NDEBUG
16678   if (CombinerAAOnlyFunc.getNumOccurrences() &&
16679       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
16680     UseAA = false;
16681 #endif
16682
16683   if (UseAA && AA &&
16684       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
16685     // Use alias analysis information.
16686     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
16687     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
16688     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
16689     AliasResult AAResult =
16690         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
16691                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
16692                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
16693                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
16694     if (AAResult == NoAlias)
16695       return false;
16696   }
16697
16698   // Otherwise we have to assume they alias.
16699   return true;
16700 }
16701
16702 /// Walk up chain skipping non-aliasing memory nodes,
16703 /// looking for aliasing nodes and adding them to the Aliases vector.
16704 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
16705                                    SmallVectorImpl<SDValue> &Aliases) {
16706   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
16707   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
16708
16709   // Get alias information for node.
16710   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
16711
16712   // Starting off.
16713   Chains.push_back(OriginalChain);
16714   unsigned Depth = 0;
16715
16716   // Look at each chain and determine if it is an alias.  If so, add it to the
16717   // aliases list.  If not, then continue up the chain looking for the next
16718   // candidate.
16719   while (!Chains.empty()) {
16720     SDValue Chain = Chains.pop_back_val();
16721
16722     // For TokenFactor nodes, look at each operand and only continue up the
16723     // chain until we reach the depth limit.
16724     //
16725     // FIXME: The depth check could be made to return the last non-aliasing
16726     // chain we found before we hit a tokenfactor rather than the original
16727     // chain.
16728     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
16729       Aliases.clear();
16730       Aliases.push_back(OriginalChain);
16731       return;
16732     }
16733
16734     // Don't bother if we've been before.
16735     if (!Visited.insert(Chain.getNode()).second)
16736       continue;
16737
16738     switch (Chain.getOpcode()) {
16739     case ISD::EntryToken:
16740       // Entry token is ideal chain operand, but handled in FindBetterChain.
16741       break;
16742
16743     case ISD::LOAD:
16744     case ISD::STORE: {
16745       // Get alias information for Chain.
16746       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
16747           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
16748
16749       // If chain is alias then stop here.
16750       if (!(IsLoad && IsOpLoad) &&
16751           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
16752         Aliases.push_back(Chain);
16753       } else {
16754         // Look further up the chain.
16755         Chains.push_back(Chain.getOperand(0));
16756         ++Depth;
16757       }
16758       break;
16759     }
16760
16761     case ISD::TokenFactor:
16762       // We have to check each of the operands of the token factor for "small"
16763       // token factors, so we queue them up.  Adding the operands to the queue
16764       // (stack) in reverse order maintains the original order and increases the
16765       // likelihood that getNode will find a matching token factor (CSE.)
16766       if (Chain.getNumOperands() > 16) {
16767         Aliases.push_back(Chain);
16768         break;
16769       }
16770       for (unsigned n = Chain.getNumOperands(); n;)
16771         Chains.push_back(Chain.getOperand(--n));
16772       ++Depth;
16773       break;
16774
16775     case ISD::CopyFromReg:
16776       // Forward past CopyFromReg.
16777       Chains.push_back(Chain.getOperand(0));
16778       ++Depth;
16779       break;
16780
16781     default:
16782       // For all other instructions we will just have to take what we can get.
16783       Aliases.push_back(Chain);
16784       break;
16785     }
16786   }
16787 }
16788
16789 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
16790 /// (aliasing node.)
16791 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
16792   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
16793
16794   // Accumulate all the aliases to this node.
16795   GatherAllAliases(N, OldChain, Aliases);
16796
16797   // If no operands then chain to entry token.
16798   if (Aliases.size() == 0)
16799     return DAG.getEntryNode();
16800
16801   // If a single operand then chain to it.  We don't need to revisit it.
16802   if (Aliases.size() == 1)
16803     return Aliases[0];
16804
16805   // Construct a custom tailored token factor.
16806   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
16807 }
16808
16809 // This function tries to collect a bunch of potentially interesting
16810 // nodes to improve the chains of, all at once. This might seem
16811 // redundant, as this function gets called when visiting every store
16812 // node, so why not let the work be done on each store as it's visited?
16813 //
16814 // I believe this is mainly important because MergeConsecutiveStores
16815 // is unable to deal with merging stores of different sizes, so unless
16816 // we improve the chains of all the potential candidates up-front
16817 // before running MergeConsecutiveStores, it might only see some of
16818 // the nodes that will eventually be candidates, and then not be able
16819 // to go from a partially-merged state to the desired final
16820 // fully-merged state.
16821 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
16822   // This holds the base pointer, index, and the offset in bytes from the base
16823   // pointer.
16824   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
16825
16826   // We must have a base and an offset.
16827   if (!BasePtr.Base.getNode())
16828     return false;
16829
16830   // Do not handle stores to undef base pointers.
16831   if (BasePtr.Base.isUndef())
16832     return false;
16833
16834   SmallVector<StoreSDNode *, 8> ChainedStores;
16835   ChainedStores.push_back(St);
16836
16837   // Walk up the chain and look for nodes with offsets from the same
16838   // base pointer. Stop when reaching an instruction with a different kind
16839   // or instruction which has a different base pointer.
16840   StoreSDNode *Index = St;
16841   while (Index) {
16842     // If the chain has more than one use, then we can't reorder the mem ops.
16843     if (Index != St && !SDValue(Index, 0)->hasOneUse())
16844       break;
16845
16846     if (Index->isVolatile() || Index->isIndexed())
16847       break;
16848
16849     // Find the base pointer and offset for this memory node.
16850     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
16851
16852     // Check that the base pointer is the same as the original one.
16853     if (!Ptr.equalBaseIndex(BasePtr))
16854       break;
16855
16856     // Walk up the chain to find the next store node, ignoring any
16857     // intermediate loads. Any other kind of node will halt the loop.
16858     SDNode *NextInChain = Index->getChain().getNode();
16859     while (true) {
16860       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
16861         // We found a store node. Use it for the next iteration.
16862         if (STn->isVolatile() || STn->isIndexed()) {
16863           Index = nullptr;
16864           break;
16865         }
16866         ChainedStores.push_back(STn);
16867         Index = STn;
16868         break;
16869       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
16870         NextInChain = Ldn->getChain().getNode();
16871         continue;
16872       } else {
16873         Index = nullptr;
16874         break;
16875       }
16876     } // end while
16877   }
16878
16879   // At this point, ChainedStores lists all of the Store nodes
16880   // reachable by iterating up through chain nodes matching the above
16881   // conditions.  For each such store identified, try to find an
16882   // earlier chain to attach the store to which won't violate the
16883   // required ordering.
16884   bool MadeChangeToSt = false;
16885   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
16886
16887   for (StoreSDNode *ChainedStore : ChainedStores) {
16888     SDValue Chain = ChainedStore->getChain();
16889     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
16890
16891     if (Chain != BetterChain) {
16892       if (ChainedStore == St)
16893         MadeChangeToSt = true;
16894       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
16895     }
16896   }
16897
16898   // Do all replacements after finding the replacements to make to avoid making
16899   // the chains more complicated by introducing new TokenFactors.
16900   for (auto Replacement : BetterChains)
16901     replaceStoreChain(Replacement.first, Replacement.second);
16902
16903   return MadeChangeToSt;
16904 }
16905
16906 /// This is the entry point for the file.
16907 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
16908                            CodeGenOpt::Level OptLevel) {
16909   /// This is the main entry point to this class.
16910   DAGCombiner(*this, AA, OptLevel).Run(Level);
16911 }