contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  11 // both before and after the DAG is legalized.
  12 //
  13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14 // primarily intended to handle simplification opportunities that are implicit
  15 // in the LLVM IR and exposed by the various codegen lowering phases.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/APInt.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/DenseMap.h"
  23 #include "llvm/ADT/IntervalMap.h"
  24 #include "llvm/ADT/None.h"
  25 #include "llvm/ADT/Optional.h"
  26 #include "llvm/ADT/STLExtras.h"
  27 #include "llvm/ADT/SetVector.h"
  28 #include "llvm/ADT/SmallBitVector.h"
  29 #include "llvm/ADT/SmallPtrSet.h"
  30 #include "llvm/ADT/SmallSet.h"
  31 #include "llvm/ADT/SmallVector.h"
  32 #include "llvm/ADT/Statistic.h"
  33 #include "llvm/Analysis/AliasAnalysis.h"
  34 #include "llvm/Analysis/MemoryLocation.h"
  35 #include "llvm/CodeGen/DAGCombine.h"
  36 #include "llvm/CodeGen/ISDOpcodes.h"
  37 #include "llvm/CodeGen/MachineFrameInfo.h"
  38 #include "llvm/CodeGen/MachineFunction.h"
  39 #include "llvm/CodeGen/MachineMemOperand.h"
  40 #include "llvm/CodeGen/RuntimeLibcalls.h"
  41 #include "llvm/CodeGen/SelectionDAG.h"
  42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  43 #include "llvm/CodeGen/SelectionDAGNodes.h"
  44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  45 #include "llvm/CodeGen/TargetLowering.h"
  46 #include "llvm/CodeGen/TargetRegisterInfo.h"
  47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48 #include "llvm/CodeGen/ValueTypes.h"
  49 #include "llvm/IR/Attributes.h"
  50 #include "llvm/IR/Constant.h"
  51 #include "llvm/IR/DataLayout.h"
  52 #include "llvm/IR/DerivedTypes.h"
  53 #include "llvm/IR/Function.h"
  54 #include "llvm/IR/LLVMContext.h"
  55 #include "llvm/IR/Metadata.h"
  56 #include "llvm/Support/Casting.h"
  57 #include "llvm/Support/CodeGen.h"
  58 #include "llvm/Support/CommandLine.h"
  59 #include "llvm/Support/Compiler.h"
  60 #include "llvm/Support/Debug.h"
  61 #include "llvm/Support/ErrorHandling.h"
  62 #include "llvm/Support/KnownBits.h"
  63 #include "llvm/Support/MachineValueType.h"
  64 #include "llvm/Support/MathExtras.h"
  65 #include "llvm/Support/raw_ostream.h"
  66 #include "llvm/Target/TargetMachine.h"
  67 #include "llvm/Target/TargetOptions.h"
  68 #include <algorithm>
  69 #include <cassert>
  70 #include <cstdint>
  71 #include <functional>
  72 #include <iterator>
  73 #include <string>
  74 #include <tuple>
  75 #include <utility>
  76
  77 using namespace llvm;
  78
  79 #define DEBUG_TYPE "dagcombine"
  80
  81 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  84 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  85 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  86 STATISTIC(SlicedLoads, "Number of load sliced");
  87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  88
  89 static cl::opt<bool>
  90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  91                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  92
  93 static cl::opt<bool>
  94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  95         cl::desc("Enable DAG combiner's use of TBAA"));
  96
  97 #ifndef NDEBUG
  98 static cl::opt<std::string>
  99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
 100                    cl::desc("Only use DAG-combiner alias analysis in this"
 101                             " function"));
 102 #endif
 103
 104 /// Hidden option to stress test load slicing, i.e., when this option
 105 /// is enabled, load slicing bypasses most of its profitability guards.
 106 static cl::opt<bool>
 107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 108                   cl::desc("Bypass the profitability model of load slicing"),
 109                   cl::init(false));
 110
 111 static cl::opt<bool>
 112   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 113                     cl::desc("DAG combiner may split indexing from loads"));
 114
 115 namespace {
 116
 117   class DAGCombiner {
 118     SelectionDAG &DAG;
 119     const TargetLowering &TLI;
 120     CombineLevel Level;
 121     CodeGenOpt::Level OptLevel;
 122     bool LegalOperations = false;
 123     bool LegalTypes = false;
 124     bool ForCodeSize;
 125
 126     /// Worklist of all of the nodes that need to be simplified.
 127     ///
 128     /// This must behave as a stack -- new nodes to process are pushed onto the
 129     /// back and when processing we pop off of the back.
 130     ///
 131     /// The worklist will not contain duplicates but may contain null entries
 132     /// due to nodes being deleted from the underlying DAG.
 133     SmallVector<SDNode *, 64> Worklist;
 134
 135     /// Mapping from an SDNode to its position on the worklist.
 136     ///
 137     /// This is used to find and remove nodes from the worklist (by nulling
 138     /// them) when they are deleted from the underlying DAG. It relies on
 139     /// stable indices of nodes within the worklist.
 140     DenseMap<SDNode *, unsigned> WorklistMap;
 141
 142     /// Set of nodes which have been combined (at least once).
 143     ///
 144     /// This is used to allow us to reliably add any operands of a DAG node
 145     /// which have not yet been combined to the worklist.
 146     SmallPtrSet<SDNode *, 32> CombinedNodes;
 147
 148     // AA - Used for DAG load/store alias analysis.
 149     AliasAnalysis *AA;
 150
 151     /// When an instruction is simplified, add all users of the instruction to
 152     /// the work lists because they might get more simplified now.
 153     void AddUsersToWorklist(SDNode *N) {
 154       for (SDNode *Node : N->uses())
 155         AddToWorklist(Node);
 156     }
 157
 158     /// Call the node-specific routine that folds each particular type of node.
 159     SDValue visit(SDNode *N);
 160
 161   public:
 162     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 163         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 164           OptLevel(OL), AA(AA) {
 165       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
 166
 167       MaximumLegalStoreInBits = 0;
 168       for (MVT VT : MVT::all_valuetypes())
 169         if (EVT(VT).isSimple() && VT != MVT::Other &&
 170             TLI.isTypeLegal(EVT(VT)) &&
 171             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 172           MaximumLegalStoreInBits = VT.getSizeInBits();
 173     }
 174
 175     /// Add to the worklist making sure its instance is at the back (next to be
 176     /// processed.)
 177     void AddToWorklist(SDNode *N) {
 178       assert(N->getOpcode() != ISD::DELETED_NODE &&
 179              "Deleted Node added to Worklist");
 180
 181       // Skip handle nodes as they can't usefully be combined and confuse the
 182       // zero-use deletion strategy.
 183       if (N->getOpcode() == ISD::HANDLENODE)
 184         return;
 185
 186       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 187         Worklist.push_back(N);
 188     }
 189
 190     /// Remove all instances of N from the worklist.
 191     void removeFromWorklist(SDNode *N) {
 192       CombinedNodes.erase(N);
 193
 194       auto It = WorklistMap.find(N);
 195       if (It == WorklistMap.end())
 196         return; // Not in the worklist.
 197
 198       // Null out the entry rather than erasing it to avoid a linear operation.
 199       Worklist[It->second] = nullptr;
 200       WorklistMap.erase(It);
 201     }
 202
 203     void deleteAndRecombine(SDNode *N);
 204     bool recursivelyDeleteUnusedNodes(SDNode *N);
 205
 206     /// Replaces all uses of the results of one DAG node with new values.
 207     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 208                       bool AddTo = true);
 209
 210     /// Replaces all uses of the results of one DAG node with new values.
 211     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 212       return CombineTo(N, &Res, 1, AddTo);
 213     }
 214
 215     /// Replaces all uses of the results of one DAG node with new values.
 216     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 217                       bool AddTo = true) {
 218       SDValue To[] = { Res0, Res1 };
 219       return CombineTo(N, To, 2, AddTo);
 220     }
 221
 222     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 223
 224   private:
 225     unsigned MaximumLegalStoreInBits;
 226
 227     /// Check the specified integer node value to see if it can be simplified or
 228     /// if things it uses can be simplified by bit propagation.
 229     /// If so, return true.
 230     bool SimplifyDemandedBits(SDValue Op) {
 231       unsigned BitWidth = Op.getScalarValueSizeInBits();
 232       APInt Demanded = APInt::getAllOnesValue(BitWidth);
 233       return SimplifyDemandedBits(Op, Demanded);
 234     }
 235
 236     /// Check the specified vector node value to see if it can be simplified or
 237     /// if things it uses can be simplified as it only uses some of the
 238     /// elements. If so, return true.
 239     bool SimplifyDemandedVectorElts(SDValue Op) {
 240       unsigned NumElts = Op.getValueType().getVectorNumElements();
 241       APInt Demanded = APInt::getAllOnesValue(NumElts);
 242       return SimplifyDemandedVectorElts(Op, Demanded);
 243     }
 244
 245     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
 246     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
 247                                     bool AssumeSingleUse = false);
 248
 249     bool CombineToPreIndexedLoadStore(SDNode *N);
 250     bool CombineToPostIndexedLoadStore(SDNode *N);
 251     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 252     bool SliceUpLoad(SDNode *N);
 253
 254     // Scalars have size 0 to distinguish from singleton vectors.
 255     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 256     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 257     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 258
 259     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 260     ///   load.
 261     ///
 262     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 263     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 264     /// \param EltNo index of the vector element to load.
 265     /// \param OriginalLoad load that EVE came from to be replaced.
 266     /// \returns EVE on success SDValue() on failure.
 267     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 268                                          SDValue EltNo,
 269                                          LoadSDNode *OriginalLoad);
 270     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 271     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 272     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 273     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 274     SDValue PromoteIntBinOp(SDValue Op);
 275     SDValue PromoteIntShiftOp(SDValue Op);
 276     SDValue PromoteExtend(SDValue Op);
 277     bool PromoteLoad(SDValue Op);
 278
 279     /// Call the node-specific routine that knows how to fold each
 280     /// particular type of node. If that doesn't do anything, try the
 281     /// target-specific DAG combines.
 282     SDValue combine(SDNode *N);
 283
 284     // Visitation implementation - Implement dag node combining for different
 285     // node types.  The semantics are as follows:
 286     // Return Value:
 287     //   SDValue.getNode() == 0 - No change was made
 288     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 289     //   otherwise              - N should be replaced by the returned Operand.
 290     //
 291     SDValue visitTokenFactor(SDNode *N);
 292     SDValue visitMERGE_VALUES(SDNode *N);
 293     SDValue visitADD(SDNode *N);
 294     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 295     SDValue visitSUB(SDNode *N);
 296     SDValue visitADDSAT(SDNode *N);
 297     SDValue visitSUBSAT(SDNode *N);
 298     SDValue visitADDC(SDNode *N);
 299     SDValue visitUADDO(SDNode *N);
 300     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 301     SDValue visitSUBC(SDNode *N);
 302     SDValue visitUSUBO(SDNode *N);
 303     SDValue visitADDE(SDNode *N);
 304     SDValue visitADDCARRY(SDNode *N);
 305     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 306     SDValue visitSUBE(SDNode *N);
 307     SDValue visitSUBCARRY(SDNode *N);
 308     SDValue visitMUL(SDNode *N);
 309     SDValue useDivRem(SDNode *N);
 310     SDValue visitSDIV(SDNode *N);
 311     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 312     SDValue visitUDIV(SDNode *N);
 313     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 314     SDValue visitREM(SDNode *N);
 315     SDValue visitMULHU(SDNode *N);
 316     SDValue visitMULHS(SDNode *N);
 317     SDValue visitSMUL_LOHI(SDNode *N);
 318     SDValue visitUMUL_LOHI(SDNode *N);
 319     SDValue visitSMULO(SDNode *N);
 320     SDValue visitUMULO(SDNode *N);
 321     SDValue visitIMINMAX(SDNode *N);
 322     SDValue visitAND(SDNode *N);
 323     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 324     SDValue visitOR(SDNode *N);
 325     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 326     SDValue visitXOR(SDNode *N);
 327     SDValue SimplifyVBinOp(SDNode *N);
 328     SDValue visitSHL(SDNode *N);
 329     SDValue visitSRA(SDNode *N);
 330     SDValue visitSRL(SDNode *N);
 331     SDValue visitFunnelShift(SDNode *N);
 332     SDValue visitRotate(SDNode *N);
 333     SDValue visitABS(SDNode *N);
 334     SDValue visitBSWAP(SDNode *N);
 335     SDValue visitBITREVERSE(SDNode *N);
 336     SDValue visitCTLZ(SDNode *N);
 337     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 338     SDValue visitCTTZ(SDNode *N);
 339     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 340     SDValue visitCTPOP(SDNode *N);
 341     SDValue visitSELECT(SDNode *N);
 342     SDValue visitVSELECT(SDNode *N);
 343     SDValue visitSELECT_CC(SDNode *N);
 344     SDValue visitSETCC(SDNode *N);
 345     SDValue visitSETCCCARRY(SDNode *N);
 346     SDValue visitSIGN_EXTEND(SDNode *N);
 347     SDValue visitZERO_EXTEND(SDNode *N);
 348     SDValue visitANY_EXTEND(SDNode *N);
 349     SDValue visitAssertExt(SDNode *N);
 350     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 351     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 352     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 353     SDValue visitTRUNCATE(SDNode *N);
 354     SDValue visitBITCAST(SDNode *N);
 355     SDValue visitBUILD_PAIR(SDNode *N);
 356     SDValue visitFADD(SDNode *N);
 357     SDValue visitFSUB(SDNode *N);
 358     SDValue visitFMUL(SDNode *N);
 359     SDValue visitFMA(SDNode *N);
 360     SDValue visitFDIV(SDNode *N);
 361     SDValue visitFREM(SDNode *N);
 362     SDValue visitFSQRT(SDNode *N);
 363     SDValue visitFCOPYSIGN(SDNode *N);
 364     SDValue visitFPOW(SDNode *N);
 365     SDValue visitSINT_TO_FP(SDNode *N);
 366     SDValue visitUINT_TO_FP(SDNode *N);
 367     SDValue visitFP_TO_SINT(SDNode *N);
 368     SDValue visitFP_TO_UINT(SDNode *N);
 369     SDValue visitFP_ROUND(SDNode *N);
 370     SDValue visitFP_ROUND_INREG(SDNode *N);
 371     SDValue visitFP_EXTEND(SDNode *N);
 372     SDValue visitFNEG(SDNode *N);
 373     SDValue visitFABS(SDNode *N);
 374     SDValue visitFCEIL(SDNode *N);
 375     SDValue visitFTRUNC(SDNode *N);
 376     SDValue visitFFLOOR(SDNode *N);
 377     SDValue visitFMINNUM(SDNode *N);
 378     SDValue visitFMAXNUM(SDNode *N);
 379     SDValue visitFMINIMUM(SDNode *N);
 380     SDValue visitFMAXIMUM(SDNode *N);
 381     SDValue visitBRCOND(SDNode *N);
 382     SDValue visitBR_CC(SDNode *N);
 383     SDValue visitLOAD(SDNode *N);
 384
 385     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 386     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 387
 388     SDValue visitSTORE(SDNode *N);
 389     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 390     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 391     SDValue visitBUILD_VECTOR(SDNode *N);
 392     SDValue visitCONCAT_VECTORS(SDNode *N);
 393     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 394     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 395     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 396     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 397     SDValue visitMLOAD(SDNode *N);
 398     SDValue visitMSTORE(SDNode *N);
 399     SDValue visitMGATHER(SDNode *N);
 400     SDValue visitMSCATTER(SDNode *N);
 401     SDValue visitFP_TO_FP16(SDNode *N);
 402     SDValue visitFP16_TO_FP(SDNode *N);
 403
 404     SDValue visitFADDForFMACombine(SDNode *N);
 405     SDValue visitFSUBForFMACombine(SDNode *N);
 406     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 407
 408     SDValue XformToShuffleWithZero(SDNode *N);
 409     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 410                            SDValue N1, SDNodeFlags Flags);
 411
 412     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 413
 414     SDValue foldSelectOfConstants(SDNode *N);
 415     SDValue foldVSelectOfConstants(SDNode *N);
 416     SDValue foldBinOpIntoSelect(SDNode *BO);
 417     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 418     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 419     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 420     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 421                              SDValue N2, SDValue N3, ISD::CondCode CC,
 422                              bool NotExtCompare = false);
 423     SDValue convertSelectOfFPConstantsToLoadOffset(
 424         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 425         ISD::CondCode CC);
 426     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 427                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 428     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 429                               const SDLoc &DL);
 430     SDValue unfoldMaskedMerge(SDNode *N);
 431     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 432     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 433                           const SDLoc &DL, bool foldBooleans);
 434     SDValue rebuildSetCC(SDValue N);
 435
 436     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 437                            SDValue &CC) const;
 438     bool isOneUseSetCC(SDValue N) const;
 439
 440     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 441                                          unsigned HiOp);
 442     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 443     SDValue CombineExtLoad(SDNode *N);
 444     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 445     SDValue combineRepeatedFPDivisors(SDNode *N);
 446     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 447     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 448     SDValue BuildSDIV(SDNode *N);
 449     SDValue BuildSDIVPow2(SDNode *N);
 450     SDValue BuildUDIV(SDNode *N);
 451     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 452     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 453     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 454     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 455     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 456     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 457                                 SDNodeFlags Flags, bool Reciprocal);
 458     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 459                                 SDNodeFlags Flags, bool Reciprocal);
 460     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 461                                bool DemandHighBits = true);
 462     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 463     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 464                               SDValue InnerPos, SDValue InnerNeg,
 465                               unsigned PosOpcode, unsigned NegOpcode,
 466                               const SDLoc &DL);
 467     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 468     SDValue MatchLoadCombine(SDNode *N);
 469     SDValue ReduceLoadWidth(SDNode *N);
 470     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 471     SDValue splitMergedValStore(StoreSDNode *ST);
 472     SDValue TransformFPLoadStorePair(SDNode *N);
 473     SDValue convertBuildVecZextToZext(SDNode *N);
 474     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 475     SDValue reduceBuildVecToShuffle(SDNode *N);
 476     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 477                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 478                                   SDValue VecIn2, unsigned LeftIdx);
 479     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 480
 481     /// Walk up chain skipping non-aliasing memory nodes,
 482     /// looking for aliasing nodes and adding them to the Aliases vector.
 483     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 484                           SmallVectorImpl<SDValue> &Aliases);
 485
 486     /// Return true if there is any possibility that the two addresses overlap.
 487     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
 488
 489     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 490     /// chain (aliasing node.)
 491     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 492
 493     /// Try to replace a store and any possibly adjacent stores on
 494     /// consecutive chains with better chains. Return true only if St is
 495     /// replaced.
 496     ///
 497     /// Notice that other chains may still be replaced even if the function
 498     /// returns false.
 499     bool findBetterNeighborChains(StoreSDNode *St);
 500
 501     // Helper for findBetterNeighborChains. Walk up store chain add additional
 502     // chained stores that do not overlap and can be parallelized.
 503     bool parallelizeChainedStores(StoreSDNode *St);
 504
 505     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 506     /// is located in a sequence of memory operations connected by a chain.
 507     struct MemOpLink {
 508       // Ptr to the mem node.
 509       LSBaseSDNode *MemNode;
 510
 511       // Offset from the base ptr.
 512       int64_t OffsetFromBase;
 513
 514       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 515           : MemNode(N), OffsetFromBase(Offset) {}
 516     };
 517
 518     /// This is a helper function for visitMUL to check the profitability
 519     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 520     /// MulNode is the original multiply, AddNode is (add x, c1),
 521     /// and ConstNode is c2.
 522     bool isMulAddWithConstProfitable(SDNode *MulNode,
 523                                      SDValue &AddNode,
 524                                      SDValue &ConstNode);
 525
 526     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 527     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 528     /// the type of the loaded value to be extended.
 529     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 530                           EVT LoadResultTy, EVT &ExtVT);
 531
 532     /// Helper function to calculate whether the given Load/Store can have its
 533     /// width reduced to ExtVT.
 534     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 535                            EVT &MemVT, unsigned ShAmt = 0);
 536
 537     /// Used by BackwardsPropagateMask to find suitable loads.
 538     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 539                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 540                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 541     /// Attempt to propagate a given AND node back to load leaves so that they
 542     /// can be combined into narrow loads.
 543     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 544
 545     /// Helper function for MergeConsecutiveStores which merges the
 546     /// component store chains.
 547     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 548                                 unsigned NumStores);
 549
 550     /// This is a helper function for MergeConsecutiveStores. When the
 551     /// source elements of the consecutive stores are all constants or
 552     /// all extracted vector elements, try to merge them into one
 553     /// larger store introducing bitcasts if necessary.  \return True
 554     /// if a merged store was created.
 555     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 556                                          EVT MemVT, unsigned NumStores,
 557                                          bool IsConstantSrc, bool UseVector,
 558                                          bool UseTrunc);
 559
 560     /// This is a helper function for MergeConsecutiveStores. Stores
 561     /// that potentially may be merged with St are placed in
 562     /// StoreNodes. RootNode is a chain predecessor to all store
 563     /// candidates.
 564     void getStoreMergeCandidates(StoreSDNode *St,
 565                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 566                                  SDNode *&Root);
 567
 568     /// Helper function for MergeConsecutiveStores. Checks if
 569     /// candidate stores have indirect dependency through their
 570     /// operands. RootNode is the predecessor to all stores calculated
 571     /// by getStoreMergeCandidates and is used to prune the dependency check.
 572     /// \return True if safe to merge.
 573     bool checkMergeStoreCandidatesForDependencies(
 574         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 575         SDNode *RootNode);
 576
 577     /// Merge consecutive store operations into a wide store.
 578     /// This optimization uses wide integers or vectors when possible.
 579     /// \return number of stores that were merged into a merged store (the
 580     /// affected nodes are stored as a prefix in \p StoreNodes).
 581     bool MergeConsecutiveStores(StoreSDNode *St);
 582
 583     /// Try to transform a truncation where C is a constant:
 584     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 585     ///
 586     /// \p N needs to be a truncation and its first operand an AND. Other
 587     /// requirements are checked by the function (e.g. that trunc is
 588     /// single-use) and if missed an empty SDValue is returned.
 589     SDValue distributeTruncateThroughAnd(SDNode *N);
 590
 591     /// Helper function to determine whether the target supports operation
 592     /// given by \p Opcode for type \p VT, that is, whether the operation
 593     /// is legal or custom before legalizing operations, and whether is
 594     /// legal (but not custom) after legalization.
 595     bool hasOperation(unsigned Opcode, EVT VT) {
 596       if (LegalOperations)
 597         return TLI.isOperationLegal(Opcode, VT);
 598       return TLI.isOperationLegalOrCustom(Opcode, VT);
 599     }
 600
 601   public:
 602     /// Runs the dag combiner on all nodes in the work list
 603     void Run(CombineLevel AtLevel);
 604
 605     SelectionDAG &getDAG() const { return DAG; }
 606
 607     /// Returns a type large enough to hold any valid shift amount - before type
 608     /// legalization these can be huge.
 609     EVT getShiftAmountTy(EVT LHSTy) {
 610       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 611       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 612     }
 613
 614     /// This method returns true if we are running before type legalization or
 615     /// if the specified VT is legal.
 616     bool isTypeLegal(const EVT &VT) {
 617       if (!LegalTypes) return true;
 618       return TLI.isTypeLegal(VT);
 619     }
 620
 621     /// Convenience wrapper around TargetLowering::getSetCCResultType
 622     EVT getSetCCResultType(EVT VT) const {
 623       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 624     }
 625
 626     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 627                          SDValue OrigLoad, SDValue ExtLoad,
 628                          ISD::NodeType ExtType);
 629   };
 630
 631 /// This class is a DAGUpdateListener that removes any deleted
 632 /// nodes from the worklist.
 633 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 634   DAGCombiner &DC;
 635
 636 public:
 637   explicit WorklistRemover(DAGCombiner &dc)
 638     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 639
 640   void NodeDeleted(SDNode *N, SDNode *E) override {
 641     DC.removeFromWorklist(N);
 642   }
 643 };
 644
 645 } // end anonymous namespace
 646
 647 //===----------------------------------------------------------------------===//
 648 //  TargetLowering::DAGCombinerInfo implementation
 649 //===----------------------------------------------------------------------===//
 650
 651 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 652   ((DAGCombiner*)DC)->AddToWorklist(N);
 653 }
 654
 655 SDValue TargetLowering::DAGCombinerInfo::
 656 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 657   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 658 }
 659
 660 SDValue TargetLowering::DAGCombinerInfo::
 661 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 662   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 663 }
 664
 665 SDValue TargetLowering::DAGCombinerInfo::
 666 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 667   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 668 }
 669
 670 void TargetLowering::DAGCombinerInfo::
 671 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 672   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 673 }
 674
 675 //===----------------------------------------------------------------------===//
 676 // Helper Functions
 677 //===----------------------------------------------------------------------===//
 678
 679 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 680   removeFromWorklist(N);
 681
 682   // If the operands of this node are only used by the node, they will now be
 683   // dead. Make sure to re-visit them and recursively delete dead nodes.
 684   for (const SDValue &Op : N->ops())
 685     // For an operand generating multiple values, one of the values may
 686     // become dead allowing further simplification (e.g. split index
 687     // arithmetic from an indexed load).
 688     if (Op->hasOneUse() || Op->getNumValues() > 1)
 689       AddToWorklist(Op.getNode());
 690
 691   DAG.DeleteNode(N);
 692 }
 693
 694 /// Return 1 if we can compute the negated form of the specified expression for
 695 /// the same cost as the expression itself, or 2 if we can compute the negated
 696 /// form more cheaply than the expression itself.
 697 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 698                                const TargetLowering &TLI,
 699                                const TargetOptions *Options,
 700                                unsigned Depth = 0) {
 701   // fneg is removable even if it has multiple uses.
 702   if (Op.getOpcode() == ISD::FNEG) return 2;
 703
 704   // Don't allow anything with multiple uses unless we know it is free.
 705   EVT VT = Op.getValueType();
 706   const SDNodeFlags Flags = Op->getFlags();
 707   if (!Op.hasOneUse())
 708     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
 709           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
 710       return 0;
 711
 712   // Don't recurse exponentially.
 713   if (Depth > 6) return 0;
 714
 715   switch (Op.getOpcode()) {
 716   default: return false;
 717   case ISD::ConstantFP: {
 718     if (!LegalOperations)
 719       return 1;
 720
 721     // Don't invert constant FP values after legalization unless the target says
 722     // the negated constant is legal.
 723     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 724       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
 725   }
 726   case ISD::FADD:
 727     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
 728       return 0;
 729
 730     // After operation legalization, it might not be legal to create new FSUBs.
 731     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
 732       return 0;
 733
 734     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 735     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 736                                     Options, Depth + 1))
 737       return V;
 738     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 739     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 740                               Depth + 1);
 741   case ISD::FSUB:
 742     // We can't turn -(A-B) into B-A when we honor signed zeros.
 743     if (!Options->NoSignedZerosFPMath &&
 744         !Flags.hasNoSignedZeros())
 745       return 0;
 746
 747     // fold (fneg (fsub A, B)) -> (fsub B, A)
 748     return 1;
 749
 750   case ISD::FMUL:
 751   case ISD::FDIV:
 752     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 753     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 754                                     Options, Depth + 1))
 755       return V;
 756
 757     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 758                               Depth + 1);
 759
 760   case ISD::FP_EXTEND:
 761   case ISD::FP_ROUND:
 762   case ISD::FSIN:
 763     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 764                               Depth + 1);
 765   }
 766 }
 767
 768 /// If isNegatibleForFree returns true, return the newly negated expression.
 769 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 770                                     bool LegalOperations, unsigned Depth = 0) {
 771   const TargetOptions &Options = DAG.getTarget().Options;
 772   // fneg is removable even if it has multiple uses.
 773   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 774
 775   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 776
 777   const SDNodeFlags Flags = Op.getNode()->getFlags();
 778
 779   switch (Op.getOpcode()) {
 780   default: llvm_unreachable("Unknown code");
 781   case ISD::ConstantFP: {
 782     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 783     V.changeSign();
 784     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 785   }
 786   case ISD::FADD:
 787     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
 788
 789     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 790     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 791                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 792       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 793                          GetNegatedExpression(Op.getOperand(0), DAG,
 794                                               LegalOperations, Depth+1),
 795                          Op.getOperand(1), Flags);
 796     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 797     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 798                        GetNegatedExpression(Op.getOperand(1), DAG,
 799                                             LegalOperations, Depth+1),
 800                        Op.getOperand(0), Flags);
 801   case ISD::FSUB:
 802     // fold (fneg (fsub 0, B)) -> B
 803     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 804       if (N0CFP->isZero())
 805         return Op.getOperand(1);
 806
 807     // fold (fneg (fsub A, B)) -> (fsub B, A)
 808     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 809                        Op.getOperand(1), Op.getOperand(0), Flags);
 810
 811   case ISD::FMUL:
 812   case ISD::FDIV:
 813     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 814     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 815                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 816       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 817                          GetNegatedExpression(Op.getOperand(0), DAG,
 818                                               LegalOperations, Depth+1),
 819                          Op.getOperand(1), Flags);
 820
 821     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 822     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 823                        Op.getOperand(0),
 824                        GetNegatedExpression(Op.getOperand(1), DAG,
 825                                             LegalOperations, Depth+1), Flags);
 826
 827   case ISD::FP_EXTEND:
 828   case ISD::FSIN:
 829     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 830                        GetNegatedExpression(Op.getOperand(0), DAG,
 831                                             LegalOperations, Depth+1));
 832   case ISD::FP_ROUND:
 833       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 834                          GetNegatedExpression(Op.getOperand(0), DAG,
 835                                               LegalOperations, Depth+1),
 836                          Op.getOperand(1));
 837   }
 838 }
 839
 840 // APInts must be the same size for most operations, this helper
 841 // function zero extends the shorter of the pair so that they match.
 842 // We provide an Offset so that we can create bitwidths that won't overflow.
 843 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 844   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 845   LHS = LHS.zextOrSelf(Bits);
 846   RHS = RHS.zextOrSelf(Bits);
 847 }
 848
 849 // Return true if this node is a setcc, or is a select_cc
 850 // that selects between the target values used for true and false, making it
 851 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 852 // the appropriate nodes based on the type of node we are checking. This
 853 // simplifies life a bit for the callers.
 854 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 855                                     SDValue &CC) const {
 856   if (N.getOpcode() == ISD::SETCC) {
 857     LHS = N.getOperand(0);
 858     RHS = N.getOperand(1);
 859     CC  = N.getOperand(2);
 860     return true;
 861   }
 862
 863   if (N.getOpcode() != ISD::SELECT_CC ||
 864       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 865       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 866     return false;
 867
 868   if (TLI.getBooleanContents(N.getValueType()) ==
 869       TargetLowering::UndefinedBooleanContent)
 870     return false;
 871
 872   LHS = N.getOperand(0);
 873   RHS = N.getOperand(1);
 874   CC  = N.getOperand(4);
 875   return true;
 876 }
 877
 878 /// Return true if this is a SetCC-equivalent operation with only one use.
 879 /// If this is true, it allows the users to invert the operation for free when
 880 /// it is profitable to do so.
 881 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 882   SDValue N0, N1, N2;
 883   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 884     return true;
 885   return false;
 886 }
 887
 888 // Returns the SDNode if it is a constant float BuildVector
 889 // or constant float.
 890 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 891   if (isa<ConstantFPSDNode>(N))
 892     return N.getNode();
 893   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 894     return N.getNode();
 895   return nullptr;
 896 }
 897
 898 // Determines if it is a constant integer or a build vector of constant
 899 // integers (and undefs).
 900 // Do not permit build vector implicit truncation.
 901 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 902   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 903     return !(Const->isOpaque() && NoOpaques);
 904   if (N.getOpcode() != ISD::BUILD_VECTOR)
 905     return false;
 906   unsigned BitWidth = N.getScalarValueSizeInBits();
 907   for (const SDValue &Op : N->op_values()) {
 908     if (Op.isUndef())
 909       continue;
 910     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 911     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 912         (Const->isOpaque() && NoOpaques))
 913       return false;
 914   }
 915   return true;
 916 }
 917
 918 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 919 // undef's.
 920 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 921   if (V.getOpcode() != ISD::BUILD_VECTOR)
 922     return false;
 923   return isConstantOrConstantVector(V, NoOpaques) ||
 924          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 925 }
 926
 927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 928                                     SDValue N1, SDNodeFlags Flags) {
 929   // Don't reassociate reductions.
 930   if (Flags.hasVectorReduction())
 931     return SDValue();
 932
 933   EVT VT = N0.getValueType();
 934   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
 935     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 936       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 937         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
 938         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
 939           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 940         return SDValue();
 941       }
 942       if (N0.hasOneUse()) {
 943         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
 944         // use
 945         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 946         if (!OpNode.getNode())
 947           return SDValue();
 948         AddToWorklist(OpNode.getNode());
 949         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 950       }
 951     }
 952   }
 953
 954   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
 955     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
 956       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
 957         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
 958         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
 959           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
 960         return SDValue();
 961       }
 962       if (N1.hasOneUse()) {
 963         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
 964         // use
 965         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
 966         if (!OpNode.getNode())
 967           return SDValue();
 968         AddToWorklist(OpNode.getNode());
 969         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
 970       }
 971     }
 972   }
 973
 974   return SDValue();
 975 }
 976
 977 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 978                                bool AddTo) {
 979   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 980   ++NodesCombined;
 981   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
 982              To[0].getNode()->dump(&DAG);
 983              dbgs() << " and " << NumTo - 1 << " other values\n");
 984   for (unsigned i = 0, e = NumTo; i != e; ++i)
 985     assert((!To[i].getNode() ||
 986             N->getValueType(i) == To[i].getValueType()) &&
 987            "Cannot combine value to value of different type!");
 988
 989   WorklistRemover DeadNodes(*this);
 990   DAG.ReplaceAllUsesWith(N, To);
 991   if (AddTo) {
 992     // Push the new nodes and any users onto the worklist
 993     for (unsigned i = 0, e = NumTo; i != e; ++i) {
 994       if (To[i].getNode()) {
 995         AddToWorklist(To[i].getNode());
 996         AddUsersToWorklist(To[i].getNode());
 997       }
 998     }
 999   }
1000
1001   // Finally, if the node is now dead, remove it from the graph.  The node
1002   // may not be dead if the replacement process recursively simplified to
1003   // something else needing this node.
1004   if (N->use_empty())
1005     deleteAndRecombine(N);
1006   return SDValue(N, 0);
1007 }
1008
1009 void DAGCombiner::
1010 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1011   // Replace all uses.  If any nodes become isomorphic to other nodes and
1012   // are deleted, make sure to remove them from our worklist.
1013   WorklistRemover DeadNodes(*this);
1014   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1015
1016   // Push the new node and any (possibly new) users onto the worklist.
1017   AddToWorklist(TLO.New.getNode());
1018   AddUsersToWorklist(TLO.New.getNode());
1019
1020   // Finally, if the node is now dead, remove it from the graph.  The node
1021   // may not be dead if the replacement process recursively simplified to
1022   // something else needing this node.
1023   if (TLO.Old.getNode()->use_empty())
1024     deleteAndRecombine(TLO.Old.getNode());
1025 }
1026
1027 /// Check the specified integer node value to see if it can be simplified or if
1028 /// things it uses can be simplified by bit propagation. If so, return true.
1029 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1030   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1031   KnownBits Known;
1032   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1033     return false;
1034
1035   // Revisit the node.
1036   AddToWorklist(Op.getNode());
1037
1038   // Replace the old value with the new one.
1039   ++NodesCombined;
1040   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1041              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1042              dbgs() << '\n');
1043
1044   CommitTargetLoweringOpt(TLO);
1045   return true;
1046 }
1047
1048 /// Check the specified vector node value to see if it can be simplified or
1049 /// if things it uses can be simplified as it only uses some of the elements.
1050 /// If so, return true.
1051 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1052                                              bool AssumeSingleUse) {
1053   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1054   APInt KnownUndef, KnownZero;
1055   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1056                                       0, AssumeSingleUse))
1057     return false;
1058
1059   // Revisit the node.
1060   AddToWorklist(Op.getNode());
1061
1062   // Replace the old value with the new one.
1063   ++NodesCombined;
1064   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1065              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1066              dbgs() << '\n');
1067
1068   CommitTargetLoweringOpt(TLO);
1069   return true;
1070 }
1071
1072 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1073   SDLoc DL(Load);
1074   EVT VT = Load->getValueType(0);
1075   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1076
1077   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1078              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1079   WorklistRemover DeadNodes(*this);
1080   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1081   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1082   deleteAndRecombine(Load);
1083   AddToWorklist(Trunc.getNode());
1084 }
1085
1086 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1087   Replace = false;
1088   SDLoc DL(Op);
1089   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1090     LoadSDNode *LD = cast<LoadSDNode>(Op);
1091     EVT MemVT = LD->getMemoryVT();
1092     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1093                                                       : LD->getExtensionType();
1094     Replace = true;
1095     return DAG.getExtLoad(ExtType, DL, PVT,
1096                           LD->getChain(), LD->getBasePtr(),
1097                           MemVT, LD->getMemOperand());
1098   }
1099
1100   unsigned Opc = Op.getOpcode();
1101   switch (Opc) {
1102   default: break;
1103   case ISD::AssertSext:
1104     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1105       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1106     break;
1107   case ISD::AssertZext:
1108     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1109       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1110     break;
1111   case ISD::Constant: {
1112     unsigned ExtOpc =
1113       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1114     return DAG.getNode(ExtOpc, DL, PVT, Op);
1115   }
1116   }
1117
1118   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1119     return SDValue();
1120   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1121 }
1122
1123 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1124   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1125     return SDValue();
1126   EVT OldVT = Op.getValueType();
1127   SDLoc DL(Op);
1128   bool Replace = false;
1129   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1130   if (!NewOp.getNode())
1131     return SDValue();
1132   AddToWorklist(NewOp.getNode());
1133
1134   if (Replace)
1135     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1136   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1137                      DAG.getValueType(OldVT));
1138 }
1139
1140 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1141   EVT OldVT = Op.getValueType();
1142   SDLoc DL(Op);
1143   bool Replace = false;
1144   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1145   if (!NewOp.getNode())
1146     return SDValue();
1147   AddToWorklist(NewOp.getNode());
1148
1149   if (Replace)
1150     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1151   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1152 }
1153
1154 /// Promote the specified integer binary operation if the target indicates it is
1155 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1156 /// i32 since i16 instructions are longer.
1157 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1158   if (!LegalOperations)
1159     return SDValue();
1160
1161   EVT VT = Op.getValueType();
1162   if (VT.isVector() || !VT.isInteger())
1163     return SDValue();
1164
1165   // If operation type is 'undesirable', e.g. i16 on x86, consider
1166   // promoting it.
1167   unsigned Opc = Op.getOpcode();
1168   if (TLI.isTypeDesirableForOp(Opc, VT))
1169     return SDValue();
1170
1171   EVT PVT = VT;
1172   // Consult target whether it is a good idea to promote this operation and
1173   // what's the right type to promote it to.
1174   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1175     assert(PVT != VT && "Don't know what type to promote to!");
1176
1177     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1178
1179     bool Replace0 = false;
1180     SDValue N0 = Op.getOperand(0);
1181     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1182
1183     bool Replace1 = false;
1184     SDValue N1 = Op.getOperand(1);
1185     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1186     SDLoc DL(Op);
1187
1188     SDValue RV =
1189         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1190
1191     // We are always replacing N0/N1's use in N and only need
1192     // additional replacements if there are additional uses.
1193     Replace0 &= !N0->hasOneUse();
1194     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1195
1196     // Combine Op here so it is preserved past replacements.
1197     CombineTo(Op.getNode(), RV);
1198
1199     // If operands have a use ordering, make sure we deal with
1200     // predecessor first.
1201     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1202       std::swap(N0, N1);
1203       std::swap(NN0, NN1);
1204     }
1205
1206     if (Replace0) {
1207       AddToWorklist(NN0.getNode());
1208       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1209     }
1210     if (Replace1) {
1211       AddToWorklist(NN1.getNode());
1212       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1213     }
1214     return Op;
1215   }
1216   return SDValue();
1217 }
1218
1219 /// Promote the specified integer shift operation if the target indicates it is
1220 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1221 /// i32 since i16 instructions are longer.
1222 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1223   if (!LegalOperations)
1224     return SDValue();
1225
1226   EVT VT = Op.getValueType();
1227   if (VT.isVector() || !VT.isInteger())
1228     return SDValue();
1229
1230   // If operation type is 'undesirable', e.g. i16 on x86, consider
1231   // promoting it.
1232   unsigned Opc = Op.getOpcode();
1233   if (TLI.isTypeDesirableForOp(Opc, VT))
1234     return SDValue();
1235
1236   EVT PVT = VT;
1237   // Consult target whether it is a good idea to promote this operation and
1238   // what's the right type to promote it to.
1239   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240     assert(PVT != VT && "Don't know what type to promote to!");
1241
1242     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1243
1244     bool Replace = false;
1245     SDValue N0 = Op.getOperand(0);
1246     SDValue N1 = Op.getOperand(1);
1247     if (Opc == ISD::SRA)
1248       N0 = SExtPromoteOperand(N0, PVT);
1249     else if (Opc == ISD::SRL)
1250       N0 = ZExtPromoteOperand(N0, PVT);
1251     else
1252       N0 = PromoteOperand(N0, PVT, Replace);
1253
1254     if (!N0.getNode())
1255       return SDValue();
1256
1257     SDLoc DL(Op);
1258     SDValue RV =
1259         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1260
1261     AddToWorklist(N0.getNode());
1262     if (Replace)
1263       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1264
1265     // Deal with Op being deleted.
1266     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1267       return RV;
1268   }
1269   return SDValue();
1270 }
1271
1272 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1273   if (!LegalOperations)
1274     return SDValue();
1275
1276   EVT VT = Op.getValueType();
1277   if (VT.isVector() || !VT.isInteger())
1278     return SDValue();
1279
1280   // If operation type is 'undesirable', e.g. i16 on x86, consider
1281   // promoting it.
1282   unsigned Opc = Op.getOpcode();
1283   if (TLI.isTypeDesirableForOp(Opc, VT))
1284     return SDValue();
1285
1286   EVT PVT = VT;
1287   // Consult target whether it is a good idea to promote this operation and
1288   // what's the right type to promote it to.
1289   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1290     assert(PVT != VT && "Don't know what type to promote to!");
1291     // fold (aext (aext x)) -> (aext x)
1292     // fold (aext (zext x)) -> (zext x)
1293     // fold (aext (sext x)) -> (sext x)
1294     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1295     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1296   }
1297   return SDValue();
1298 }
1299
1300 bool DAGCombiner::PromoteLoad(SDValue Op) {
1301   if (!LegalOperations)
1302     return false;
1303
1304   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1305     return false;
1306
1307   EVT VT = Op.getValueType();
1308   if (VT.isVector() || !VT.isInteger())
1309     return false;
1310
1311   // If operation type is 'undesirable', e.g. i16 on x86, consider
1312   // promoting it.
1313   unsigned Opc = Op.getOpcode();
1314   if (TLI.isTypeDesirableForOp(Opc, VT))
1315     return false;
1316
1317   EVT PVT = VT;
1318   // Consult target whether it is a good idea to promote this operation and
1319   // what's the right type to promote it to.
1320   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1321     assert(PVT != VT && "Don't know what type to promote to!");
1322
1323     SDLoc DL(Op);
1324     SDNode *N = Op.getNode();
1325     LoadSDNode *LD = cast<LoadSDNode>(N);
1326     EVT MemVT = LD->getMemoryVT();
1327     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1328                                                       : LD->getExtensionType();
1329     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1330                                    LD->getChain(), LD->getBasePtr(),
1331                                    MemVT, LD->getMemOperand());
1332     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1333
1334     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1335                Result.getNode()->dump(&DAG); dbgs() << '\n');
1336     WorklistRemover DeadNodes(*this);
1337     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1338     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1339     deleteAndRecombine(N);
1340     AddToWorklist(Result.getNode());
1341     return true;
1342   }
1343   return false;
1344 }
1345
1346 /// Recursively delete a node which has no uses and any operands for
1347 /// which it is the only use.
1348 ///
1349 /// Note that this both deletes the nodes and removes them from the worklist.
1350 /// It also adds any nodes who have had a user deleted to the worklist as they
1351 /// may now have only one use and subject to other combines.
1352 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1353   if (!N->use_empty())
1354     return false;
1355
1356   SmallSetVector<SDNode *, 16> Nodes;
1357   Nodes.insert(N);
1358   do {
1359     N = Nodes.pop_back_val();
1360     if (!N)
1361       continue;
1362
1363     if (N->use_empty()) {
1364       for (const SDValue &ChildN : N->op_values())
1365         Nodes.insert(ChildN.getNode());
1366
1367       removeFromWorklist(N);
1368       DAG.DeleteNode(N);
1369     } else {
1370       AddToWorklist(N);
1371     }
1372   } while (!Nodes.empty());
1373   return true;
1374 }
1375
1376 //===----------------------------------------------------------------------===//
1377 //  Main DAG Combiner implementation
1378 //===----------------------------------------------------------------------===//
1379
1380 void DAGCombiner::Run(CombineLevel AtLevel) {
1381   // set the instance variables, so that the various visit routines may use it.
1382   Level = AtLevel;
1383   LegalOperations = Level >= AfterLegalizeVectorOps;
1384   LegalTypes = Level >= AfterLegalizeTypes;
1385
1386   // Add all the dag nodes to the worklist.
1387   for (SDNode &Node : DAG.allnodes())
1388     AddToWorklist(&Node);
1389
1390   // Create a dummy node (which is not added to allnodes), that adds a reference
1391   // to the root node, preventing it from being deleted, and tracking any
1392   // changes of the root.
1393   HandleSDNode Dummy(DAG.getRoot());
1394
1395   // While the worklist isn't empty, find a node and try to combine it.
1396   while (!WorklistMap.empty()) {
1397     SDNode *N;
1398     // The Worklist holds the SDNodes in order, but it may contain null entries.
1399     do {
1400       N = Worklist.pop_back_val();
1401     } while (!N);
1402
1403     bool GoodWorklistEntry = WorklistMap.erase(N);
1404     (void)GoodWorklistEntry;
1405     assert(GoodWorklistEntry &&
1406            "Found a worklist entry without a corresponding map entry!");
1407
1408     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1409     // N is deleted from the DAG, since they too may now be dead or may have a
1410     // reduced number of uses, allowing other xforms.
1411     if (recursivelyDeleteUnusedNodes(N))
1412       continue;
1413
1414     WorklistRemover DeadNodes(*this);
1415
1416     // If this combine is running after legalizing the DAG, re-legalize any
1417     // nodes pulled off the worklist.
1418     if (Level == AfterLegalizeDAG) {
1419       SmallSetVector<SDNode *, 16> UpdatedNodes;
1420       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1421
1422       for (SDNode *LN : UpdatedNodes) {
1423         AddToWorklist(LN);
1424         AddUsersToWorklist(LN);
1425       }
1426       if (!NIsValid)
1427         continue;
1428     }
1429
1430     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1431
1432     // Add any operands of the new node which have not yet been combined to the
1433     // worklist as well. Because the worklist uniques things already, this
1434     // won't repeatedly process the same operand.
1435     CombinedNodes.insert(N);
1436     for (const SDValue &ChildN : N->op_values())
1437       if (!CombinedNodes.count(ChildN.getNode()))
1438         AddToWorklist(ChildN.getNode());
1439
1440     SDValue RV = combine(N);
1441
1442     if (!RV.getNode())
1443       continue;
1444
1445     ++NodesCombined;
1446
1447     // If we get back the same node we passed in, rather than a new node or
1448     // zero, we know that the node must have defined multiple values and
1449     // CombineTo was used.  Since CombineTo takes care of the worklist
1450     // mechanics for us, we have no work to do in this case.
1451     if (RV.getNode() == N)
1452       continue;
1453
1454     assert(N->getOpcode() != ISD::DELETED_NODE &&
1455            RV.getOpcode() != ISD::DELETED_NODE &&
1456            "Node was deleted but visit returned new node!");
1457
1458     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1459
1460     if (N->getNumValues() == RV.getNode()->getNumValues())
1461       DAG.ReplaceAllUsesWith(N, RV.getNode());
1462     else {
1463       assert(N->getValueType(0) == RV.getValueType() &&
1464              N->getNumValues() == 1 && "Type mismatch");
1465       DAG.ReplaceAllUsesWith(N, &RV);
1466     }
1467
1468     // Push the new node and any users onto the worklist
1469     AddToWorklist(RV.getNode());
1470     AddUsersToWorklist(RV.getNode());
1471
1472     // Finally, if the node is now dead, remove it from the graph.  The node
1473     // may not be dead if the replacement process recursively simplified to
1474     // something else needing this node. This will also take care of adding any
1475     // operands which have lost a user to the worklist.
1476     recursivelyDeleteUnusedNodes(N);
1477   }
1478
1479   // If the root changed (e.g. it was a dead load, update the root).
1480   DAG.setRoot(Dummy.getValue());
1481   DAG.RemoveDeadNodes();
1482 }
1483
1484 SDValue DAGCombiner::visit(SDNode *N) {
1485   switch (N->getOpcode()) {
1486   default: break;
1487   case ISD::TokenFactor:        return visitTokenFactor(N);
1488   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1489   case ISD::ADD:                return visitADD(N);
1490   case ISD::SUB:                return visitSUB(N);
1491   case ISD::SADDSAT:
1492   case ISD::UADDSAT:            return visitADDSAT(N);
1493   case ISD::SSUBSAT:
1494   case ISD::USUBSAT:            return visitSUBSAT(N);
1495   case ISD::ADDC:               return visitADDC(N);
1496   case ISD::UADDO:              return visitUADDO(N);
1497   case ISD::SUBC:               return visitSUBC(N);
1498   case ISD::USUBO:              return visitUSUBO(N);
1499   case ISD::ADDE:               return visitADDE(N);
1500   case ISD::ADDCARRY:           return visitADDCARRY(N);
1501   case ISD::SUBE:               return visitSUBE(N);
1502   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1503   case ISD::MUL:                return visitMUL(N);
1504   case ISD::SDIV:               return visitSDIV(N);
1505   case ISD::UDIV:               return visitUDIV(N);
1506   case ISD::SREM:
1507   case ISD::UREM:               return visitREM(N);
1508   case ISD::MULHU:              return visitMULHU(N);
1509   case ISD::MULHS:              return visitMULHS(N);
1510   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1511   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1512   case ISD::SMULO:              return visitSMULO(N);
1513   case ISD::UMULO:              return visitUMULO(N);
1514   case ISD::SMIN:
1515   case ISD::SMAX:
1516   case ISD::UMIN:
1517   case ISD::UMAX:               return visitIMINMAX(N);
1518   case ISD::AND:                return visitAND(N);
1519   case ISD::OR:                 return visitOR(N);
1520   case ISD::XOR:                return visitXOR(N);
1521   case ISD::SHL:                return visitSHL(N);
1522   case ISD::SRA:                return visitSRA(N);
1523   case ISD::SRL:                return visitSRL(N);
1524   case ISD::ROTR:
1525   case ISD::ROTL:               return visitRotate(N);
1526   case ISD::FSHL:
1527   case ISD::FSHR:               return visitFunnelShift(N);
1528   case ISD::ABS:                return visitABS(N);
1529   case ISD::BSWAP:              return visitBSWAP(N);
1530   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1531   case ISD::CTLZ:               return visitCTLZ(N);
1532   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1533   case ISD::CTTZ:               return visitCTTZ(N);
1534   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1535   case ISD::CTPOP:              return visitCTPOP(N);
1536   case ISD::SELECT:             return visitSELECT(N);
1537   case ISD::VSELECT:            return visitVSELECT(N);
1538   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1539   case ISD::SETCC:              return visitSETCC(N);
1540   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1541   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1542   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1543   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1544   case ISD::AssertSext:
1545   case ISD::AssertZext:         return visitAssertExt(N);
1546   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1547   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1548   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1549   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1550   case ISD::BITCAST:            return visitBITCAST(N);
1551   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1552   case ISD::FADD:               return visitFADD(N);
1553   case ISD::FSUB:               return visitFSUB(N);
1554   case ISD::FMUL:               return visitFMUL(N);
1555   case ISD::FMA:                return visitFMA(N);
1556   case ISD::FDIV:               return visitFDIV(N);
1557   case ISD::FREM:               return visitFREM(N);
1558   case ISD::FSQRT:              return visitFSQRT(N);
1559   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1560   case ISD::FPOW:               return visitFPOW(N);
1561   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1562   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1563   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1564   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1565   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1566   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1567   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1568   case ISD::FNEG:               return visitFNEG(N);
1569   case ISD::FABS:               return visitFABS(N);
1570   case ISD::FFLOOR:             return visitFFLOOR(N);
1571   case ISD::FMINNUM:            return visitFMINNUM(N);
1572   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1573   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1574   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1575   case ISD::FCEIL:              return visitFCEIL(N);
1576   case ISD::FTRUNC:             return visitFTRUNC(N);
1577   case ISD::BRCOND:             return visitBRCOND(N);
1578   case ISD::BR_CC:              return visitBR_CC(N);
1579   case ISD::LOAD:               return visitLOAD(N);
1580   case ISD::STORE:              return visitSTORE(N);
1581   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1582   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1583   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1584   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1585   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1586   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1587   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1588   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1589   case ISD::MGATHER:            return visitMGATHER(N);
1590   case ISD::MLOAD:              return visitMLOAD(N);
1591   case ISD::MSCATTER:           return visitMSCATTER(N);
1592   case ISD::MSTORE:             return visitMSTORE(N);
1593   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1594   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1595   }
1596   return SDValue();
1597 }
1598
1599 SDValue DAGCombiner::combine(SDNode *N) {
1600   SDValue RV = visit(N);
1601
1602   // If nothing happened, try a target-specific DAG combine.
1603   if (!RV.getNode()) {
1604     assert(N->getOpcode() != ISD::DELETED_NODE &&
1605            "Node was deleted but visit returned NULL!");
1606
1607     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1608         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1609
1610       // Expose the DAG combiner to the target combiner impls.
1611       TargetLowering::DAGCombinerInfo
1612         DagCombineInfo(DAG, Level, false, this);
1613
1614       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1615     }
1616   }
1617
1618   // If nothing happened still, try promoting the operation.
1619   if (!RV.getNode()) {
1620     switch (N->getOpcode()) {
1621     default: break;
1622     case ISD::ADD:
1623     case ISD::SUB:
1624     case ISD::MUL:
1625     case ISD::AND:
1626     case ISD::OR:
1627     case ISD::XOR:
1628       RV = PromoteIntBinOp(SDValue(N, 0));
1629       break;
1630     case ISD::SHL:
1631     case ISD::SRA:
1632     case ISD::SRL:
1633       RV = PromoteIntShiftOp(SDValue(N, 0));
1634       break;
1635     case ISD::SIGN_EXTEND:
1636     case ISD::ZERO_EXTEND:
1637     case ISD::ANY_EXTEND:
1638       RV = PromoteExtend(SDValue(N, 0));
1639       break;
1640     case ISD::LOAD:
1641       if (PromoteLoad(SDValue(N, 0)))
1642         RV = SDValue(N, 0);
1643       break;
1644     }
1645   }
1646
1647   // If N is a commutative binary node, try eliminate it if the commuted
1648   // version is already present in the DAG.
1649   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1650       N->getNumValues() == 1) {
1651     SDValue N0 = N->getOperand(0);
1652     SDValue N1 = N->getOperand(1);
1653
1654     // Constant operands are canonicalized to RHS.
1655     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1656       SDValue Ops[] = {N1, N0};
1657       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1658                                             N->getFlags());
1659       if (CSENode)
1660         return SDValue(CSENode, 0);
1661     }
1662   }
1663
1664   return RV;
1665 }
1666
1667 /// Given a node, return its input chain if it has one, otherwise return a null
1668 /// sd operand.
1669 static SDValue getInputChainForNode(SDNode *N) {
1670   if (unsigned NumOps = N->getNumOperands()) {
1671     if (N->getOperand(0).getValueType() == MVT::Other)
1672       return N->getOperand(0);
1673     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1674       return N->getOperand(NumOps-1);
1675     for (unsigned i = 1; i < NumOps-1; ++i)
1676       if (N->getOperand(i).getValueType() == MVT::Other)
1677         return N->getOperand(i);
1678   }
1679   return SDValue();
1680 }
1681
1682 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1683   // If N has two operands, where one has an input chain equal to the other,
1684   // the 'other' chain is redundant.
1685   if (N->getNumOperands() == 2) {
1686     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1687       return N->getOperand(0);
1688     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1689       return N->getOperand(1);
1690   }
1691
1692   // Don't simplify token factors if optnone.
1693   if (OptLevel == CodeGenOpt::None)
1694     return SDValue();
1695
1696   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1697   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1698   SmallPtrSet<SDNode*, 16> SeenOps;
1699   bool Changed = false;             // If we should replace this token factor.
1700
1701   // Start out with this token factor.
1702   TFs.push_back(N);
1703
1704   // Iterate through token factors.  The TFs grows when new token factors are
1705   // encountered.
1706   for (unsigned i = 0; i < TFs.size(); ++i) {
1707     SDNode *TF = TFs[i];
1708
1709     // Check each of the operands.
1710     for (const SDValue &Op : TF->op_values()) {
1711       switch (Op.getOpcode()) {
1712       case ISD::EntryToken:
1713         // Entry tokens don't need to be added to the list. They are
1714         // redundant.
1715         Changed = true;
1716         break;
1717
1718       case ISD::TokenFactor:
1719         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1720           // Queue up for processing.
1721           TFs.push_back(Op.getNode());
1722           // Clean up in case the token factor is removed.
1723           AddToWorklist(Op.getNode());
1724           Changed = true;
1725           break;
1726         }
1727         LLVM_FALLTHROUGH;
1728
1729       default:
1730         // Only add if it isn't already in the list.
1731         if (SeenOps.insert(Op.getNode()).second)
1732           Ops.push_back(Op);
1733         else
1734           Changed = true;
1735         break;
1736       }
1737     }
1738   }
1739
1740   // Remove Nodes that are chained to another node in the list. Do so
1741   // by walking up chains breath-first stopping when we've seen
1742   // another operand. In general we must climb to the EntryNode, but we can exit
1743   // early if we find all remaining work is associated with just one operand as
1744   // no further pruning is possible.
1745
1746   // List of nodes to search through and original Ops from which they originate.
1747   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1748   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1749   SmallPtrSet<SDNode *, 16> SeenChains;
1750   bool DidPruneOps = false;
1751
1752   unsigned NumLeftToConsider = 0;
1753   for (const SDValue &Op : Ops) {
1754     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1755     OpWorkCount.push_back(1);
1756   }
1757
1758   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1759     // If this is an Op, we can remove the op from the list. Remark any
1760     // search associated with it as from the current OpNumber.
1761     if (SeenOps.count(Op) != 0) {
1762       Changed = true;
1763       DidPruneOps = true;
1764       unsigned OrigOpNumber = 0;
1765       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1766         OrigOpNumber++;
1767       assert((OrigOpNumber != Ops.size()) &&
1768              "expected to find TokenFactor Operand");
1769       // Re-mark worklist from OrigOpNumber to OpNumber
1770       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1771         if (Worklist[i].second == OrigOpNumber) {
1772           Worklist[i].second = OpNumber;
1773         }
1774       }
1775       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1776       OpWorkCount[OrigOpNumber] = 0;
1777       NumLeftToConsider--;
1778     }
1779     // Add if it's a new chain
1780     if (SeenChains.insert(Op).second) {
1781       OpWorkCount[OpNumber]++;
1782       Worklist.push_back(std::make_pair(Op, OpNumber));
1783     }
1784   };
1785
1786   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1787     // We need at least be consider at least 2 Ops to prune.
1788     if (NumLeftToConsider <= 1)
1789       break;
1790     auto CurNode = Worklist[i].first;
1791     auto CurOpNumber = Worklist[i].second;
1792     assert((OpWorkCount[CurOpNumber] > 0) &&
1793            "Node should not appear in worklist");
1794     switch (CurNode->getOpcode()) {
1795     case ISD::EntryToken:
1796       // Hitting EntryToken is the only way for the search to terminate without
1797       // hitting
1798       // another operand's search. Prevent us from marking this operand
1799       // considered.
1800       NumLeftToConsider++;
1801       break;
1802     case ISD::TokenFactor:
1803       for (const SDValue &Op : CurNode->op_values())
1804         AddToWorklist(i, Op.getNode(), CurOpNumber);
1805       break;
1806     case ISD::CopyFromReg:
1807     case ISD::CopyToReg:
1808       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1809       break;
1810     default:
1811       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1812         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1813       break;
1814     }
1815     OpWorkCount[CurOpNumber]--;
1816     if (OpWorkCount[CurOpNumber] == 0)
1817       NumLeftToConsider--;
1818   }
1819
1820   // If we've changed things around then replace token factor.
1821   if (Changed) {
1822     SDValue Result;
1823     if (Ops.empty()) {
1824       // The entry token is the only possible outcome.
1825       Result = DAG.getEntryNode();
1826     } else {
1827       if (DidPruneOps) {
1828         SmallVector<SDValue, 8> PrunedOps;
1829         //
1830         for (const SDValue &Op : Ops) {
1831           if (SeenChains.count(Op.getNode()) == 0)
1832             PrunedOps.push_back(Op);
1833         }
1834         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1835       } else {
1836         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1837       }
1838     }
1839     return Result;
1840   }
1841   return SDValue();
1842 }
1843
1844 /// MERGE_VALUES can always be eliminated.
1845 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1846   WorklistRemover DeadNodes(*this);
1847   // Replacing results may cause a different MERGE_VALUES to suddenly
1848   // be CSE'd with N, and carry its uses with it. Iterate until no
1849   // uses remain, to ensure that the node can be safely deleted.
1850   // First add the users of this node to the work list so that they
1851   // can be tried again once they have new operands.
1852   AddUsersToWorklist(N);
1853   do {
1854     // Do as a single replacement to avoid rewalking use lists.
1855     SmallVector<SDValue, 8> Ops;
1856     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1857       Ops.push_back(N->getOperand(i));
1858     DAG.ReplaceAllUsesWith(N, Ops.data());
1859   } while (!N->use_empty());
1860   deleteAndRecombine(N);
1861   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1862 }
1863
1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1865 /// ConstantSDNode pointer else nullptr.
1866 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1867   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1868   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1869 }
1870
1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1872   assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1873
1874   // Don't do this unless the old select is going away. We want to eliminate the
1875   // binary operator, not replace a binop with a select.
1876   // TODO: Handle ISD::SELECT_CC.
1877   unsigned SelOpNo = 0;
1878   SDValue Sel = BO->getOperand(0);
1879   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1880     SelOpNo = 1;
1881     Sel = BO->getOperand(1);
1882   }
1883
1884   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1885     return SDValue();
1886
1887   SDValue CT = Sel.getOperand(1);
1888   if (!isConstantOrConstantVector(CT, true) &&
1889       !isConstantFPBuildVectorOrConstantFP(CT))
1890     return SDValue();
1891
1892   SDValue CF = Sel.getOperand(2);
1893   if (!isConstantOrConstantVector(CF, true) &&
1894       !isConstantFPBuildVectorOrConstantFP(CF))
1895     return SDValue();
1896
1897   // Bail out if any constants are opaque because we can't constant fold those.
1898   // The exception is "and" and "or" with either 0 or -1 in which case we can
1899   // propagate non constant operands into select. I.e.:
1900   // and (select Cond, 0, -1), X --> select Cond, 0, X
1901   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1902   auto BinOpcode = BO->getOpcode();
1903   bool CanFoldNonConst =
1904       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1905       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1906       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1907
1908   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1909   if (!CanFoldNonConst &&
1910       !isConstantOrConstantVector(CBO, true) &&
1911       !isConstantFPBuildVectorOrConstantFP(CBO))
1912     return SDValue();
1913
1914   EVT VT = Sel.getValueType();
1915
1916   // In case of shift value and shift amount may have different VT. For instance
1917   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1918   // swapped operands and value types do not match. NB: x86 is fine if operands
1919   // are not swapped with shift amount VT being not bigger than shifted value.
1920   // TODO: that is possible to check for a shift operation, correct VTs and
1921   // still perform optimization on x86 if needed.
1922   if (SelOpNo && VT != CBO.getValueType())
1923     return SDValue();
1924
1925   // We have a select-of-constants followed by a binary operator with a
1926   // constant. Eliminate the binop by pulling the constant math into the select.
1927   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1928   SDLoc DL(Sel);
1929   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1930                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1931   if (!CanFoldNonConst && !NewCT.isUndef() &&
1932       !isConstantOrConstantVector(NewCT, true) &&
1933       !isConstantFPBuildVectorOrConstantFP(NewCT))
1934     return SDValue();
1935
1936   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1937                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1938   if (!CanFoldNonConst && !NewCF.isUndef() &&
1939       !isConstantOrConstantVector(NewCF, true) &&
1940       !isConstantFPBuildVectorOrConstantFP(NewCF))
1941     return SDValue();
1942
1943   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1944 }
1945
1946 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1947   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1948          "Expecting add or sub");
1949
1950   // Match a constant operand and a zext operand for the math instruction:
1951   // add Z, C
1952   // sub C, Z
1953   bool IsAdd = N->getOpcode() == ISD::ADD;
1954   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1955   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1956   auto *CN = dyn_cast<ConstantSDNode>(C);
1957   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1958     return SDValue();
1959
1960   // Match the zext operand as a setcc of a boolean.
1961   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1962       Z.getOperand(0).getValueType() != MVT::i1)
1963     return SDValue();
1964
1965   // Match the compare as: setcc (X & 1), 0, eq.
1966   SDValue SetCC = Z.getOperand(0);
1967   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1968   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1969       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1970       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1971     return SDValue();
1972
1973   // We are adding/subtracting a constant and an inverted low bit. Turn that
1974   // into a subtract/add of the low bit with incremented/decremented constant:
1975   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1976   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1977   EVT VT = C.getValueType();
1978   SDLoc DL(N);
1979   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1980   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1981                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1982   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1983 }
1984
1985 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1986 /// a shift and add with a different constant.
1987 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
1988   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1989          "Expecting add or sub");
1990
1991   // We need a constant operand for the add/sub, and the other operand is a
1992   // logical shift right: add (srl), C or sub C, (srl).
1993   bool IsAdd = N->getOpcode() == ISD::ADD;
1994   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1995   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1996   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1997   if (!C || ShiftOp.getOpcode() != ISD::SRL)
1998     return SDValue();
1999
2000   // The shift must be of a 'not' value.
2001   SDValue Not = ShiftOp.getOperand(0);
2002   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2003     return SDValue();
2004
2005   // The shift must be moving the sign bit to the least-significant-bit.
2006   EVT VT = ShiftOp.getValueType();
2007   SDValue ShAmt = ShiftOp.getOperand(1);
2008   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2009   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2010     return SDValue();
2011
2012   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2013   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2014   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2015   SDLoc DL(N);
2016   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2017   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2018   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2019   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2020 }
2021
2022 SDValue DAGCombiner::visitADD(SDNode *N) {
2023   SDValue N0 = N->getOperand(0);
2024   SDValue N1 = N->getOperand(1);
2025   EVT VT = N0.getValueType();
2026   SDLoc DL(N);
2027
2028   // fold vector ops
2029   if (VT.isVector()) {
2030     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2031       return FoldedVOp;
2032
2033     // fold (add x, 0) -> x, vector edition
2034     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2035       return N0;
2036     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2037       return N1;
2038   }
2039
2040   // fold (add x, undef) -> undef
2041   if (N0.isUndef())
2042     return N0;
2043
2044   if (N1.isUndef())
2045     return N1;
2046
2047   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2048     // canonicalize constant to RHS
2049     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2050       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2051     // fold (add c1, c2) -> c1+c2
2052     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2053                                       N1.getNode());
2054   }
2055
2056   // fold (add x, 0) -> x
2057   if (isNullConstant(N1))
2058     return N0;
2059
2060   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2061     // fold ((c1-A)+c2) -> (c1+c2)-A
2062     if (N0.getOpcode() == ISD::SUB &&
2063         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2064       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2065       return DAG.getNode(ISD::SUB, DL, VT,
2066                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2067                          N0.getOperand(1));
2068     }
2069
2070     // add (sext i1 X), 1 -> zext (not i1 X)
2071     // We don't transform this pattern:
2072     //   add (zext i1 X), -1 -> sext (not i1 X)
2073     // because most (?) targets generate better code for the zext form.
2074     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2075         isOneOrOneSplat(N1)) {
2076       SDValue X = N0.getOperand(0);
2077       if ((!LegalOperations ||
2078            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2079             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2080           X.getScalarValueSizeInBits() == 1) {
2081         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2082         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2083       }
2084     }
2085
2086     // Undo the add -> or combine to merge constant offsets from a frame index.
2087     if (N0.getOpcode() == ISD::OR &&
2088         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2089         isa<ConstantSDNode>(N0.getOperand(1)) &&
2090         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2091       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2092       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2093     }
2094   }
2095
2096   if (SDValue NewSel = foldBinOpIntoSelect(N))
2097     return NewSel;
2098
2099   // reassociate add
2100   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2101     return RADD;
2102
2103   // fold ((0-A) + B) -> B-A
2104   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2105     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2106
2107   // fold (A + (0-B)) -> A-B
2108   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2109     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2110
2111   // fold (A+(B-A)) -> B
2112   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2113     return N1.getOperand(0);
2114
2115   // fold ((B-A)+A) -> B
2116   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2117     return N0.getOperand(0);
2118
2119   // fold (A+(B-(A+C))) to (B-C)
2120   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2121       N0 == N1.getOperand(1).getOperand(0))
2122     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2123                        N1.getOperand(1).getOperand(1));
2124
2125   // fold (A+(B-(C+A))) to (B-C)
2126   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2127       N0 == N1.getOperand(1).getOperand(1))
2128     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2129                        N1.getOperand(1).getOperand(0));
2130
2131   // fold (A+((B-A)+or-C)) to (B+or-C)
2132   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2133       N1.getOperand(0).getOpcode() == ISD::SUB &&
2134       N0 == N1.getOperand(0).getOperand(1))
2135     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2136                        N1.getOperand(1));
2137
2138   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2139   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2140     SDValue N00 = N0.getOperand(0);
2141     SDValue N01 = N0.getOperand(1);
2142     SDValue N10 = N1.getOperand(0);
2143     SDValue N11 = N1.getOperand(1);
2144
2145     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2146       return DAG.getNode(ISD::SUB, DL, VT,
2147                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2148                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2149   }
2150
2151   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2152     return V;
2153
2154   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2155     return V;
2156
2157   if (SimplifyDemandedBits(SDValue(N, 0)))
2158     return SDValue(N, 0);
2159
2160   // fold (a+b) -> (a|b) iff a and b share no bits.
2161   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2162       DAG.haveNoCommonBitsSet(N0, N1))
2163     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2164
2165   // fold (add (xor a, -1), 1) -> (sub 0, a)
2166   if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2167     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2168                        N0.getOperand(0));
2169
2170   if (SDValue Combined = visitADDLike(N0, N1, N))
2171     return Combined;
2172
2173   if (SDValue Combined = visitADDLike(N1, N0, N))
2174     return Combined;
2175
2176   return SDValue();
2177 }
2178
2179 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2180   unsigned Opcode = N->getOpcode();
2181   SDValue N0 = N->getOperand(0);
2182   SDValue N1 = N->getOperand(1);
2183   EVT VT = N0.getValueType();
2184   SDLoc DL(N);
2185
2186   // fold vector ops
2187   if (VT.isVector()) {
2188     // TODO SimplifyVBinOp
2189
2190     // fold (add_sat x, 0) -> x, vector edition
2191     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2192       return N0;
2193     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2194       return N1;
2195   }
2196
2197   // fold (add_sat x, undef) -> -1
2198   if (N0.isUndef() || N1.isUndef())
2199     return DAG.getAllOnesConstant(DL, VT);
2200
2201   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2202     // canonicalize constant to RHS
2203     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2204       return DAG.getNode(Opcode, DL, VT, N1, N0);
2205     // fold (add_sat c1, c2) -> c3
2206     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2207                                       N1.getNode());
2208   }
2209
2210   // fold (add_sat x, 0) -> x
2211   if (isNullConstant(N1))
2212     return N0;
2213
2214   // If it cannot overflow, transform into an add.
2215   if (Opcode == ISD::UADDSAT)
2216     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2217       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2218
2219   return SDValue();
2220 }
2221
2222 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2223   bool Masked = false;
2224
2225   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2226   while (true) {
2227     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2228       V = V.getOperand(0);
2229       continue;
2230     }
2231
2232     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2233       Masked = true;
2234       V = V.getOperand(0);
2235       continue;
2236     }
2237
2238     break;
2239   }
2240
2241   // If this is not a carry, return.
2242   if (V.getResNo() != 1)
2243     return SDValue();
2244
2245   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2246       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2247     return SDValue();
2248
2249   // If the result is masked, then no matter what kind of bool it is we can
2250   // return. If it isn't, then we need to make sure the bool type is either 0 or
2251   // 1 and not other values.
2252   if (Masked ||
2253       TLI.getBooleanContents(V.getValueType()) ==
2254           TargetLoweringBase::ZeroOrOneBooleanContent)
2255     return V;
2256
2257   return SDValue();
2258 }
2259
2260 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2261   EVT VT = N0.getValueType();
2262   SDLoc DL(LocReference);
2263
2264   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2265   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2266       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2267     return DAG.getNode(ISD::SUB, DL, VT, N0,
2268                        DAG.getNode(ISD::SHL, DL, VT,
2269                                    N1.getOperand(0).getOperand(1),
2270                                    N1.getOperand(1)));
2271
2272   if (N1.getOpcode() == ISD::AND) {
2273     SDValue AndOp0 = N1.getOperand(0);
2274     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2275     unsigned DestBits = VT.getScalarSizeInBits();
2276
2277     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2278     // and similar xforms where the inner op is either ~0 or 0.
2279     if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
2280       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2281   }
2282
2283   // add (sext i1), X -> sub X, (zext i1)
2284   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2285       N0.getOperand(0).getValueType() == MVT::i1 &&
2286       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2287     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2288     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2289   }
2290
2291   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2292   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2293     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2294     if (TN->getVT() == MVT::i1) {
2295       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2296                                  DAG.getConstant(1, DL, VT));
2297       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2298     }
2299   }
2300
2301   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2302   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2303       N1.getResNo() == 0)
2304     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2305                        N0, N1.getOperand(0), N1.getOperand(2));
2306
2307   // (add X, Carry) -> (addcarry X, 0, Carry)
2308   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2309     if (SDValue Carry = getAsCarry(TLI, N1))
2310       return DAG.getNode(ISD::ADDCARRY, DL,
2311                          DAG.getVTList(VT, Carry.getValueType()), N0,
2312                          DAG.getConstant(0, DL, VT), Carry);
2313
2314   return SDValue();
2315 }
2316
2317 SDValue DAGCombiner::visitADDC(SDNode *N) {
2318   SDValue N0 = N->getOperand(0);
2319   SDValue N1 = N->getOperand(1);
2320   EVT VT = N0.getValueType();
2321   SDLoc DL(N);
2322
2323   // If the flag result is dead, turn this into an ADD.
2324   if (!N->hasAnyUseOfValue(1))
2325     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2326                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2327
2328   // canonicalize constant to RHS.
2329   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2330   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2331   if (N0C && !N1C)
2332     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2333
2334   // fold (addc x, 0) -> x + no carry out
2335   if (isNullConstant(N1))
2336     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2337                                         DL, MVT::Glue));
2338
2339   // If it cannot overflow, transform into an add.
2340   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2341     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2342                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2343
2344   return SDValue();
2345 }
2346
2347 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2348                            SelectionDAG &DAG, const TargetLowering &TLI) {
2349   SDValue Cst;
2350   switch (TLI.getBooleanContents(VT)) {
2351   case TargetLowering::ZeroOrOneBooleanContent:
2352   case TargetLowering::UndefinedBooleanContent:
2353     Cst = DAG.getConstant(1, DL, VT);
2354     break;
2355   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2356     Cst = DAG.getConstant(-1, DL, VT);
2357     break;
2358   }
2359
2360   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2361 }
2362
2363 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2364   if (V.getOpcode() != ISD::XOR) return false;
2365   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2366   if (!Const) return false;
2367
2368   switch(TLI.getBooleanContents(VT)) {
2369     case TargetLowering::ZeroOrOneBooleanContent:
2370       return Const->isOne();
2371     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2372       return Const->isAllOnesValue();
2373     case TargetLowering::UndefinedBooleanContent:
2374       return (Const->getAPIntValue() & 0x01) == 1;
2375   }
2376   llvm_unreachable("Unsupported boolean content");
2377 }
2378
2379 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2380   SDValue N0 = N->getOperand(0);
2381   SDValue N1 = N->getOperand(1);
2382   EVT VT = N0.getValueType();
2383   if (VT.isVector())
2384     return SDValue();
2385
2386   EVT CarryVT = N->getValueType(1);
2387   SDLoc DL(N);
2388
2389   // If the flag result is dead, turn this into an ADD.
2390   if (!N->hasAnyUseOfValue(1))
2391     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2392                      DAG.getUNDEF(CarryVT));
2393
2394   // canonicalize constant to RHS.
2395   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2396   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2397   if (N0C && !N1C)
2398     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2399
2400   // fold (uaddo x, 0) -> x + no carry out
2401   if (isNullConstant(N1))
2402     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2403
2404   // If it cannot overflow, transform into an add.
2405   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2406     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2407                      DAG.getConstant(0, DL, CarryVT));
2408
2409   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2410   if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2411     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2412                               DAG.getConstant(0, DL, VT),
2413                               N0.getOperand(0));
2414     return CombineTo(N, Sub,
2415                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2416   }
2417
2418   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2419     return Combined;
2420
2421   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2422     return Combined;
2423
2424   return SDValue();
2425 }
2426
2427 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2428   auto VT = N0.getValueType();
2429
2430   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2431   // If Y + 1 cannot overflow.
2432   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2433     SDValue Y = N1.getOperand(0);
2434     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2435     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2436       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2437                          N1.getOperand(2));
2438   }
2439
2440   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2441   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2442     if (SDValue Carry = getAsCarry(TLI, N1))
2443       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2444                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2445
2446   return SDValue();
2447 }
2448
2449 SDValue DAGCombiner::visitADDE(SDNode *N) {
2450   SDValue N0 = N->getOperand(0);
2451   SDValue N1 = N->getOperand(1);
2452   SDValue CarryIn = N->getOperand(2);
2453
2454   // canonicalize constant to RHS
2455   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2456   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2457   if (N0C && !N1C)
2458     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2459                        N1, N0, CarryIn);
2460
2461   // fold (adde x, y, false) -> (addc x, y)
2462   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2463     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2464
2465   return SDValue();
2466 }
2467
2468 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2469   SDValue N0 = N->getOperand(0);
2470   SDValue N1 = N->getOperand(1);
2471   SDValue CarryIn = N->getOperand(2);
2472   SDLoc DL(N);
2473
2474   // canonicalize constant to RHS
2475   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2476   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2477   if (N0C && !N1C)
2478     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2479
2480   // fold (addcarry x, y, false) -> (uaddo x, y)
2481   if (isNullConstant(CarryIn)) {
2482     if (!LegalOperations ||
2483         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2484       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2485   }
2486
2487   EVT CarryVT = CarryIn.getValueType();
2488
2489   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2490   if (isNullConstant(N0) && isNullConstant(N1)) {
2491     EVT VT = N0.getValueType();
2492     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2493     AddToWorklist(CarryExt.getNode());
2494     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2495                                     DAG.getConstant(1, DL, VT)),
2496                      DAG.getConstant(0, DL, CarryVT));
2497   }
2498
2499   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2500   if (isBitwiseNot(N0) && isNullConstant(N1) &&
2501       isBooleanFlip(CarryIn, CarryVT, TLI)) {
2502     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2503                               DAG.getConstant(0, DL, N0.getValueType()),
2504                               N0.getOperand(0), CarryIn.getOperand(0));
2505     return CombineTo(N, Sub,
2506                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2507   }
2508
2509   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2510     return Combined;
2511
2512   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2513     return Combined;
2514
2515   return SDValue();
2516 }
2517
2518 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2519                                        SDNode *N) {
2520   // Iff the flag result is dead:
2521   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2522   if ((N0.getOpcode() == ISD::ADD ||
2523        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2524       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2525     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2526                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2527
2528   /**
2529    * When one of the addcarry argument is itself a carry, we may be facing
2530    * a diamond carry propagation. In which case we try to transform the DAG
2531    * to ensure linear carry propagation if that is possible.
2532    *
2533    * We are trying to get:
2534    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2535    */
2536   if (auto Y = getAsCarry(TLI, N1)) {
2537     /**
2538      *            (uaddo A, B)
2539      *             /       \
2540      *          Carry      Sum
2541      *            |          \
2542      *            | (addcarry *, 0, Z)
2543      *            |       /
2544      *             \   Carry
2545      *              |   /
2546      * (addcarry X, *, *)
2547      */
2548     if (Y.getOpcode() == ISD::UADDO &&
2549         CarryIn.getResNo() == 1 &&
2550         CarryIn.getOpcode() == ISD::ADDCARRY &&
2551         isNullConstant(CarryIn.getOperand(1)) &&
2552         CarryIn.getOperand(0) == Y.getValue(0)) {
2553       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2554                               Y.getOperand(0), Y.getOperand(1),
2555                               CarryIn.getOperand(2));
2556       AddToWorklist(NewY.getNode());
2557       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2558                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2559                          NewY.getValue(1));
2560     }
2561   }
2562
2563   return SDValue();
2564 }
2565
2566 // Since it may not be valid to emit a fold to zero for vector initializers
2567 // check if we can before folding.
2568 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2569                              SelectionDAG &DAG, bool LegalOperations) {
2570   if (!VT.isVector())
2571     return DAG.getConstant(0, DL, VT);
2572   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2573     return DAG.getConstant(0, DL, VT);
2574   return SDValue();
2575 }
2576
2577 SDValue DAGCombiner::visitSUB(SDNode *N) {
2578   SDValue N0 = N->getOperand(0);
2579   SDValue N1 = N->getOperand(1);
2580   EVT VT = N0.getValueType();
2581   SDLoc DL(N);
2582
2583   // fold vector ops
2584   if (VT.isVector()) {
2585     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2586       return FoldedVOp;
2587
2588     // fold (sub x, 0) -> x, vector edition
2589     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2590       return N0;
2591   }
2592
2593   // fold (sub x, x) -> 0
2594   // FIXME: Refactor this and xor and other similar operations together.
2595   if (N0 == N1)
2596     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2597   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2598       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2599     // fold (sub c1, c2) -> c1-c2
2600     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2601                                       N1.getNode());
2602   }
2603
2604   if (SDValue NewSel = foldBinOpIntoSelect(N))
2605     return NewSel;
2606
2607   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2608
2609   // fold (sub x, c) -> (add x, -c)
2610   if (N1C) {
2611     return DAG.getNode(ISD::ADD, DL, VT, N0,
2612                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2613   }
2614
2615   if (isNullOrNullSplat(N0)) {
2616     unsigned BitWidth = VT.getScalarSizeInBits();
2617     // Right-shifting everything out but the sign bit followed by negation is
2618     // the same as flipping arithmetic/logical shift type without the negation:
2619     // -(X >>u 31) -> (X >>s 31)
2620     // -(X >>s 31) -> (X >>u 31)
2621     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2622       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2623       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2624         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2625         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2626           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2627       }
2628     }
2629
2630     // 0 - X --> 0 if the sub is NUW.
2631     if (N->getFlags().hasNoUnsignedWrap())
2632       return N0;
2633
2634     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2635       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2636       // N1 must be 0 because negating the minimum signed value is undefined.
2637       if (N->getFlags().hasNoSignedWrap())
2638         return N0;
2639
2640       // 0 - X --> X if X is 0 or the minimum signed value.
2641       return N1;
2642     }
2643   }
2644
2645   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2646   if (isAllOnesOrAllOnesSplat(N0))
2647     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2648
2649   // fold (A - (0-B)) -> A+B
2650   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2651     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2652
2653   // fold A-(A-B) -> B
2654   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2655     return N1.getOperand(1);
2656
2657   // fold (A+B)-A -> B
2658   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2659     return N0.getOperand(1);
2660
2661   // fold (A+B)-B -> A
2662   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2663     return N0.getOperand(0);
2664
2665   // fold C2-(A+C1) -> (C2-C1)-A
2666   if (N1.getOpcode() == ISD::ADD) {
2667     SDValue N11 = N1.getOperand(1);
2668     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2669         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2670       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2671       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2672     }
2673   }
2674
2675   // fold ((A+(B+or-C))-B) -> A+or-C
2676   if (N0.getOpcode() == ISD::ADD &&
2677       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2678        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2679       N0.getOperand(1).getOperand(0) == N1)
2680     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2681                        N0.getOperand(1).getOperand(1));
2682
2683   // fold ((A+(C+B))-B) -> A+C
2684   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2685       N0.getOperand(1).getOperand(1) == N1)
2686     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2687                        N0.getOperand(1).getOperand(0));
2688
2689   // fold ((A-(B-C))-C) -> A-B
2690   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2691       N0.getOperand(1).getOperand(1) == N1)
2692     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2693                        N0.getOperand(1).getOperand(0));
2694
2695   // fold (A-(B-C)) -> A+(C-B)
2696   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2697     return DAG.getNode(ISD::ADD, DL, VT, N0,
2698                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2699                                    N1.getOperand(0)));
2700
2701   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2702   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2703     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2704         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2705       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2706                                 N1.getOperand(0).getOperand(1),
2707                                 N1.getOperand(1));
2708       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2709     }
2710     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2711         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2712       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2713                                 N1.getOperand(0),
2714                                 N1.getOperand(1).getOperand(1));
2715       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2716     }
2717   }
2718
2719   // If either operand of a sub is undef, the result is undef
2720   if (N0.isUndef())
2721     return N0;
2722   if (N1.isUndef())
2723     return N1;
2724
2725   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2726     return V;
2727
2728   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2729     return V;
2730
2731   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2732   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2733     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2734       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2735       SDValue S0 = N1.getOperand(0);
2736       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2737         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2738         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2739           if (C->getAPIntValue() == (OpSizeInBits - 1))
2740             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2741       }
2742     }
2743   }
2744
2745   // If the relocation model supports it, consider symbol offsets.
2746   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2747     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2748       // fold (sub Sym, c) -> Sym-c
2749       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2750         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2751                                     GA->getOffset() -
2752                                         (uint64_t)N1C->getSExtValue());
2753       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2754       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2755         if (GA->getGlobal() == GB->getGlobal())
2756           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2757                                  DL, VT);
2758     }
2759
2760   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2761   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2762     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2763     if (TN->getVT() == MVT::i1) {
2764       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2765                                  DAG.getConstant(1, DL, VT));
2766       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2767     }
2768   }
2769
2770   // Prefer an add for more folding potential and possibly better codegen:
2771   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2772   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2773     SDValue ShAmt = N1.getOperand(1);
2774     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2775     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2776       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2777       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2778     }
2779   }
2780
2781   return SDValue();
2782 }
2783
2784 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2785   SDValue N0 = N->getOperand(0);
2786   SDValue N1 = N->getOperand(1);
2787   EVT VT = N0.getValueType();
2788   SDLoc DL(N);
2789
2790   // fold vector ops
2791   if (VT.isVector()) {
2792     // TODO SimplifyVBinOp
2793
2794     // fold (sub_sat x, 0) -> x, vector edition
2795     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2796       return N0;
2797   }
2798
2799   // fold (sub_sat x, undef) -> 0
2800   if (N0.isUndef() || N1.isUndef())
2801     return DAG.getConstant(0, DL, VT);
2802
2803   // fold (sub_sat x, x) -> 0
2804   if (N0 == N1)
2805     return DAG.getConstant(0, DL, VT);
2806
2807   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2808       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2809     // fold (sub_sat c1, c2) -> c3
2810     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2811                                       N1.getNode());
2812   }
2813
2814   // fold (sub_sat x, 0) -> x
2815   if (isNullConstant(N1))
2816     return N0;
2817
2818   return SDValue();
2819 }
2820
2821 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2822   SDValue N0 = N->getOperand(0);
2823   SDValue N1 = N->getOperand(1);
2824   EVT VT = N0.getValueType();
2825   SDLoc DL(N);
2826
2827   // If the flag result is dead, turn this into an SUB.
2828   if (!N->hasAnyUseOfValue(1))
2829     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2830                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2831
2832   // fold (subc x, x) -> 0 + no borrow
2833   if (N0 == N1)
2834     return CombineTo(N, DAG.getConstant(0, DL, VT),
2835                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2836
2837   // fold (subc x, 0) -> x + no borrow
2838   if (isNullConstant(N1))
2839     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2840
2841   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2842   if (isAllOnesConstant(N0))
2843     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2844                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2845
2846   return SDValue();
2847 }
2848
2849 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2850   SDValue N0 = N->getOperand(0);
2851   SDValue N1 = N->getOperand(1);
2852   EVT VT = N0.getValueType();
2853   if (VT.isVector())
2854     return SDValue();
2855
2856   EVT CarryVT = N->getValueType(1);
2857   SDLoc DL(N);
2858
2859   // If the flag result is dead, turn this into an SUB.
2860   if (!N->hasAnyUseOfValue(1))
2861     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2862                      DAG.getUNDEF(CarryVT));
2863
2864   // fold (usubo x, x) -> 0 + no borrow
2865   if (N0 == N1)
2866     return CombineTo(N, DAG.getConstant(0, DL, VT),
2867                      DAG.getConstant(0, DL, CarryVT));
2868
2869   // fold (usubo x, 0) -> x + no borrow
2870   if (isNullConstant(N1))
2871     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2872
2873   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2874   if (isAllOnesConstant(N0))
2875     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2876                      DAG.getConstant(0, DL, CarryVT));
2877
2878   return SDValue();
2879 }
2880
2881 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2882   SDValue N0 = N->getOperand(0);
2883   SDValue N1 = N->getOperand(1);
2884   SDValue CarryIn = N->getOperand(2);
2885
2886   // fold (sube x, y, false) -> (subc x, y)
2887   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2888     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2889
2890   return SDValue();
2891 }
2892
2893 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2894   SDValue N0 = N->getOperand(0);
2895   SDValue N1 = N->getOperand(1);
2896   SDValue CarryIn = N->getOperand(2);
2897
2898   // fold (subcarry x, y, false) -> (usubo x, y)
2899   if (isNullConstant(CarryIn)) {
2900     if (!LegalOperations ||
2901         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2902       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2903   }
2904
2905   return SDValue();
2906 }
2907
2908 SDValue DAGCombiner::visitMUL(SDNode *N) {
2909   SDValue N0 = N->getOperand(0);
2910   SDValue N1 = N->getOperand(1);
2911   EVT VT = N0.getValueType();
2912
2913   // fold (mul x, undef) -> 0
2914   if (N0.isUndef() || N1.isUndef())
2915     return DAG.getConstant(0, SDLoc(N), VT);
2916
2917   bool N0IsConst = false;
2918   bool N1IsConst = false;
2919   bool N1IsOpaqueConst = false;
2920   bool N0IsOpaqueConst = false;
2921   APInt ConstValue0, ConstValue1;
2922   // fold vector ops
2923   if (VT.isVector()) {
2924     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2925       return FoldedVOp;
2926
2927     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2928     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2929     assert((!N0IsConst ||
2930             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2931            "Splat APInt should be element width");
2932     assert((!N1IsConst ||
2933             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2934            "Splat APInt should be element width");
2935   } else {
2936     N0IsConst = isa<ConstantSDNode>(N0);
2937     if (N0IsConst) {
2938       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2939       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2940     }
2941     N1IsConst = isa<ConstantSDNode>(N1);
2942     if (N1IsConst) {
2943       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2944       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2945     }
2946   }
2947
2948   // fold (mul c1, c2) -> c1*c2
2949   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2950     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2951                                       N0.getNode(), N1.getNode());
2952
2953   // canonicalize constant to RHS (vector doesn't have to splat)
2954   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2955      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2956     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2957   // fold (mul x, 0) -> 0
2958   if (N1IsConst && ConstValue1.isNullValue())
2959     return N1;
2960   // fold (mul x, 1) -> x
2961   if (N1IsConst && ConstValue1.isOneValue())
2962     return N0;
2963
2964   if (SDValue NewSel = foldBinOpIntoSelect(N))
2965     return NewSel;
2966
2967   // fold (mul x, -1) -> 0-x
2968   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2969     SDLoc DL(N);
2970     return DAG.getNode(ISD::SUB, DL, VT,
2971                        DAG.getConstant(0, DL, VT), N0);
2972   }
2973   // fold (mul x, (1 << c)) -> x << c
2974   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2975       DAG.isKnownToBeAPowerOfTwo(N1) &&
2976       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2977     SDLoc DL(N);
2978     SDValue LogBase2 = BuildLogBase2(N1, DL);
2979     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2980     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2981     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2982   }
2983   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2984   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2985     unsigned Log2Val = (-ConstValue1).logBase2();
2986     SDLoc DL(N);
2987     // FIXME: If the input is something that is easily negated (e.g. a
2988     // single-use add), we should put the negate there.
2989     return DAG.getNode(ISD::SUB, DL, VT,
2990                        DAG.getConstant(0, DL, VT),
2991                        DAG.getNode(ISD::SHL, DL, VT, N0,
2992                             DAG.getConstant(Log2Val, DL,
2993                                       getShiftAmountTy(N0.getValueType()))));
2994   }
2995
2996   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2997   // mul x, (2^N + 1) --> add (shl x, N), x
2998   // mul x, (2^N - 1) --> sub (shl x, N), x
2999   // Examples: x * 33 --> (x << 5) + x
3000   //           x * 15 --> (x << 4) - x
3001   //           x * -33 --> -((x << 5) + x)
3002   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3003   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3004     // TODO: We could handle more general decomposition of any constant by
3005     //       having the target set a limit on number of ops and making a
3006     //       callback to determine that sequence (similar to sqrt expansion).
3007     unsigned MathOp = ISD::DELETED_NODE;
3008     APInt MulC = ConstValue1.abs();
3009     if ((MulC - 1).isPowerOf2())
3010       MathOp = ISD::ADD;
3011     else if ((MulC + 1).isPowerOf2())
3012       MathOp = ISD::SUB;
3013
3014     if (MathOp != ISD::DELETED_NODE) {
3015       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3016                                           : (MulC + 1).logBase2();
3017       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3018              "Not expecting multiply-by-constant that could have simplified");
3019       SDLoc DL(N);
3020       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3021                                 DAG.getConstant(ShAmt, DL, VT));
3022       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3023       if (ConstValue1.isNegative())
3024         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3025       return R;
3026     }
3027   }
3028
3029   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3030   if (N0.getOpcode() == ISD::SHL &&
3031       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3032       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3033     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3034     if (isConstantOrConstantVector(C3))
3035       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3036   }
3037
3038   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3039   // use.
3040   {
3041     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3042
3043     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3044     if (N0.getOpcode() == ISD::SHL &&
3045         isConstantOrConstantVector(N0.getOperand(1)) &&
3046         N0.getNode()->hasOneUse()) {
3047       Sh = N0; Y = N1;
3048     } else if (N1.getOpcode() == ISD::SHL &&
3049                isConstantOrConstantVector(N1.getOperand(1)) &&
3050                N1.getNode()->hasOneUse()) {
3051       Sh = N1; Y = N0;
3052     }
3053
3054     if (Sh.getNode()) {
3055       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3056       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3057     }
3058   }
3059
3060   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3061   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3062       N0.getOpcode() == ISD::ADD &&
3063       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3064       isMulAddWithConstProfitable(N, N0, N1))
3065       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3066                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3067                                      N0.getOperand(0), N1),
3068                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3069                                      N0.getOperand(1), N1));
3070
3071   // reassociate mul
3072   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3073     return RMUL;
3074
3075   return SDValue();
3076 }
3077
3078 /// Return true if divmod libcall is available.
3079 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3080                                      const TargetLowering &TLI) {
3081   RTLIB::Libcall LC;
3082   EVT NodeType = Node->getValueType(0);
3083   if (!NodeType.isSimple())
3084     return false;
3085   switch (NodeType.getSimpleVT().SimpleTy) {
3086   default: return false; // No libcall for vector types.
3087   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3088   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3089   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3090   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3091   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3092   }
3093
3094   return TLI.getLibcallName(LC) != nullptr;
3095 }
3096
3097 /// Issue divrem if both quotient and remainder are needed.
3098 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3099   if (Node->use_empty())
3100     return SDValue(); // This is a dead node, leave it alone.
3101
3102   unsigned Opcode = Node->getOpcode();
3103   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3104   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3105
3106   // DivMod lib calls can still work on non-legal types if using lib-calls.
3107   EVT VT = Node->getValueType(0);
3108   if (VT.isVector() || !VT.isInteger())
3109     return SDValue();
3110
3111   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3112     return SDValue();
3113
3114   // If DIVREM is going to get expanded into a libcall,
3115   // but there is no libcall available, then don't combine.
3116   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3117       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3118     return SDValue();
3119
3120   // If div is legal, it's better to do the normal expansion
3121   unsigned OtherOpcode = 0;
3122   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3123     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3124     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3125       return SDValue();
3126   } else {
3127     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3128     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3129       return SDValue();
3130   }
3131
3132   SDValue Op0 = Node->getOperand(0);
3133   SDValue Op1 = Node->getOperand(1);
3134   SDValue combined;
3135   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3136          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3137     SDNode *User = *UI;
3138     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3139         User->use_empty())
3140       continue;
3141     // Convert the other matching node(s), too;
3142     // otherwise, the DIVREM may get target-legalized into something
3143     // target-specific that we won't be able to recognize.
3144     unsigned UserOpc = User->getOpcode();
3145     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3146         User->getOperand(0) == Op0 &&
3147         User->getOperand(1) == Op1) {
3148       if (!combined) {
3149         if (UserOpc == OtherOpcode) {
3150           SDVTList VTs = DAG.getVTList(VT, VT);
3151           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3152         } else if (UserOpc == DivRemOpc) {
3153           combined = SDValue(User, 0);
3154         } else {
3155           assert(UserOpc == Opcode);
3156           continue;
3157         }
3158       }
3159       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3160         CombineTo(User, combined);
3161       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3162         CombineTo(User, combined.getValue(1));
3163     }
3164   }
3165   return combined;
3166 }
3167
3168 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3169   SDValue N0 = N->getOperand(0);
3170   SDValue N1 = N->getOperand(1);
3171   EVT VT = N->getValueType(0);
3172   SDLoc DL(N);
3173
3174   unsigned Opc = N->getOpcode();
3175   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3176   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3177
3178   // X / undef -> undef
3179   // X % undef -> undef
3180   // X / 0 -> undef
3181   // X % 0 -> undef
3182   // NOTE: This includes vectors where any divisor element is zero/undef.
3183   if (DAG.isUndef(Opc, {N0, N1}))
3184     return DAG.getUNDEF(VT);
3185
3186   // undef / X -> 0
3187   // undef % X -> 0
3188   if (N0.isUndef())
3189     return DAG.getConstant(0, DL, VT);
3190
3191   // 0 / X -> 0
3192   // 0 % X -> 0
3193   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3194   if (N0C && N0C->isNullValue())
3195     return N0;
3196
3197   // X / X -> 1
3198   // X % X -> 0
3199   if (N0 == N1)
3200     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3201
3202   // X / 1 -> X
3203   // X % 1 -> 0
3204   // If this is a boolean op (single-bit element type), we can't have
3205   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3206   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3207   // it's a 1.
3208   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3209     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3210
3211   return SDValue();
3212 }
3213
3214 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3215   SDValue N0 = N->getOperand(0);
3216   SDValue N1 = N->getOperand(1);
3217   EVT VT = N->getValueType(0);
3218   EVT CCVT = getSetCCResultType(VT);
3219
3220   // fold vector ops
3221   if (VT.isVector())
3222     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3223       return FoldedVOp;
3224
3225   SDLoc DL(N);
3226
3227   // fold (sdiv c1, c2) -> c1/c2
3228   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3229   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3230   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3231     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3232   // fold (sdiv X, -1) -> 0-X
3233   if (N1C && N1C->isAllOnesValue())
3234     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3235   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3236   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3237     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3238                          DAG.getConstant(1, DL, VT),
3239                          DAG.getConstant(0, DL, VT));
3240
3241   if (SDValue V = simplifyDivRem(N, DAG))
3242     return V;
3243
3244   if (SDValue NewSel = foldBinOpIntoSelect(N))
3245     return NewSel;
3246
3247   // If we know the sign bits of both operands are zero, strength reduce to a
3248   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3249   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3250     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3251
3252   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3253     // If the corresponding remainder node exists, update its users with
3254     // (Dividend - (Quotient * Divisor).
3255     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3256                                               { N0, N1 })) {
3257       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3258       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3259       AddToWorklist(Mul.getNode());
3260       AddToWorklist(Sub.getNode());
3261       CombineTo(RemNode, Sub);
3262     }
3263     return V;
3264   }
3265
3266   // sdiv, srem -> sdivrem
3267   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3268   // true.  Otherwise, we break the simplification logic in visitREM().
3269   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3270   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3271     if (SDValue DivRem = useDivRem(N))
3272         return DivRem;
3273
3274   return SDValue();
3275 }
3276
3277 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3278   SDLoc DL(N);
3279   EVT VT = N->getValueType(0);
3280   EVT CCVT = getSetCCResultType(VT);
3281   unsigned BitWidth = VT.getScalarSizeInBits();
3282
3283   // Helper for determining whether a value is a power-2 constant scalar or a
3284   // vector of such elements.
3285   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3286     if (C->isNullValue() || C->isOpaque())
3287       return false;
3288     if (C->getAPIntValue().isPowerOf2())
3289       return true;
3290     if ((-C->getAPIntValue()).isPowerOf2())
3291       return true;
3292     return false;
3293   };
3294
3295   // fold (sdiv X, pow2) -> simple ops after legalize
3296   // FIXME: We check for the exact bit here because the generic lowering gives
3297   // better results in that case. The target-specific lowering should learn how
3298   // to handle exact sdivs efficiently.
3299   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3300     // Target-specific implementation of sdiv x, pow2.
3301     if (SDValue Res = BuildSDIVPow2(N))
3302       return Res;
3303
3304     // Create constants that are functions of the shift amount value.
3305     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3306     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3307     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3308     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3309     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3310     if (!isConstantOrConstantVector(Inexact))
3311       return SDValue();
3312
3313     // Splat the sign bit into the register
3314     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3315                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3316     AddToWorklist(Sign.getNode());
3317
3318     // Add (N0 < 0) ? abs2 - 1 : 0;
3319     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3320     AddToWorklist(Srl.getNode());
3321     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3322     AddToWorklist(Add.getNode());
3323     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3324     AddToWorklist(Sra.getNode());
3325
3326     // Special case: (sdiv X, 1) -> X
3327     // Special Case: (sdiv X, -1) -> 0-X
3328     SDValue One = DAG.getConstant(1, DL, VT);
3329     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3330     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3331     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3332     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3333     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3334
3335     // If dividing by a positive value, we're done. Otherwise, the result must
3336     // be negated.
3337     SDValue Zero = DAG.getConstant(0, DL, VT);
3338     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3339
3340     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3341     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3342     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3343     return Res;
3344   }
3345
3346   // If integer divide is expensive and we satisfy the requirements, emit an
3347   // alternate sequence.  Targets may check function attributes for size/speed
3348   // trade-offs.
3349   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3350   if (isConstantOrConstantVector(N1) &&
3351       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3352     if (SDValue Op = BuildSDIV(N))
3353       return Op;
3354
3355   return SDValue();
3356 }
3357
3358 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3359   SDValue N0 = N->getOperand(0);
3360   SDValue N1 = N->getOperand(1);
3361   EVT VT = N->getValueType(0);
3362   EVT CCVT = getSetCCResultType(VT);
3363
3364   // fold vector ops
3365   if (VT.isVector())
3366     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3367       return FoldedVOp;
3368
3369   SDLoc DL(N);
3370
3371   // fold (udiv c1, c2) -> c1/c2
3372   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3373   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3374   if (N0C && N1C)
3375     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3376                                                     N0C, N1C))
3377       return Folded;
3378   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3379   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3380     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3381                          DAG.getConstant(1, DL, VT),
3382                          DAG.getConstant(0, DL, VT));
3383
3384   if (SDValue V = simplifyDivRem(N, DAG))
3385     return V;
3386
3387   if (SDValue NewSel = foldBinOpIntoSelect(N))
3388     return NewSel;
3389
3390   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3391     // If the corresponding remainder node exists, update its users with
3392     // (Dividend - (Quotient * Divisor).
3393     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3394                                               { N0, N1 })) {
3395       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3396       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3397       AddToWorklist(Mul.getNode());
3398       AddToWorklist(Sub.getNode());
3399       CombineTo(RemNode, Sub);
3400     }
3401     return V;
3402   }
3403
3404   // sdiv, srem -> sdivrem
3405   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3406   // true.  Otherwise, we break the simplification logic in visitREM().
3407   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3408   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3409     if (SDValue DivRem = useDivRem(N))
3410         return DivRem;
3411
3412   return SDValue();
3413 }
3414
3415 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3416   SDLoc DL(N);
3417   EVT VT = N->getValueType(0);
3418
3419   // fold (udiv x, (1 << c)) -> x >>u c
3420   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3421       DAG.isKnownToBeAPowerOfTwo(N1)) {
3422     SDValue LogBase2 = BuildLogBase2(N1, DL);
3423     AddToWorklist(LogBase2.getNode());
3424
3425     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3426     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3427     AddToWorklist(Trunc.getNode());
3428     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3429   }
3430
3431   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3432   if (N1.getOpcode() == ISD::SHL) {
3433     SDValue N10 = N1.getOperand(0);
3434     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3435         DAG.isKnownToBeAPowerOfTwo(N10)) {
3436       SDValue LogBase2 = BuildLogBase2(N10, DL);
3437       AddToWorklist(LogBase2.getNode());
3438
3439       EVT ADDVT = N1.getOperand(1).getValueType();
3440       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3441       AddToWorklist(Trunc.getNode());
3442       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3443       AddToWorklist(Add.getNode());
3444       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3445     }
3446   }
3447
3448   // fold (udiv x, c) -> alternate
3449   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3450   if (isConstantOrConstantVector(N1) &&
3451       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3452     if (SDValue Op = BuildUDIV(N))
3453       return Op;
3454
3455   return SDValue();
3456 }
3457
3458 // handles ISD::SREM and ISD::UREM
3459 SDValue DAGCombiner::visitREM(SDNode *N) {
3460   unsigned Opcode = N->getOpcode();
3461   SDValue N0 = N->getOperand(0);
3462   SDValue N1 = N->getOperand(1);
3463   EVT VT = N->getValueType(0);
3464   EVT CCVT = getSetCCResultType(VT);
3465
3466   bool isSigned = (Opcode == ISD::SREM);
3467   SDLoc DL(N);
3468
3469   // fold (rem c1, c2) -> c1%c2
3470   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3471   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3472   if (N0C && N1C)
3473     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3474       return Folded;
3475   // fold (urem X, -1) -> select(X == -1, 0, x)
3476   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3477     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3478                          DAG.getConstant(0, DL, VT), N0);
3479
3480   if (SDValue V = simplifyDivRem(N, DAG))
3481     return V;
3482
3483   if (SDValue NewSel = foldBinOpIntoSelect(N))
3484     return NewSel;
3485
3486   if (isSigned) {
3487     // If we know the sign bits of both operands are zero, strength reduce to a
3488     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3489     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3490       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3491   } else {
3492     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3493     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3494       // fold (urem x, pow2) -> (and x, pow2-1)
3495       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3496       AddToWorklist(Add.getNode());
3497       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3498     }
3499     if (N1.getOpcode() == ISD::SHL &&
3500         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3501       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3502       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3503       AddToWorklist(Add.getNode());
3504       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3505     }
3506   }
3507
3508   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3509
3510   // If X/C can be simplified by the division-by-constant logic, lower
3511   // X%C to the equivalent of X-X/C*C.
3512   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3513   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3514   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3515   // combine will not return a DIVREM.  Regardless, checking cheapness here
3516   // makes sense since the simplification results in fatter code.
3517   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3518     SDValue OptimizedDiv =
3519         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3520     if (OptimizedDiv.getNode()) {
3521       // If the equivalent Div node also exists, update its users.
3522       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3523       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3524                                                 { N0, N1 }))
3525         CombineTo(DivNode, OptimizedDiv);
3526       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3527       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3528       AddToWorklist(OptimizedDiv.getNode());
3529       AddToWorklist(Mul.getNode());
3530       return Sub;
3531     }
3532   }
3533
3534   // sdiv, srem -> sdivrem
3535   if (SDValue DivRem = useDivRem(N))
3536     return DivRem.getValue(1);
3537
3538   return SDValue();
3539 }
3540
3541 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3542   SDValue N0 = N->getOperand(0);
3543   SDValue N1 = N->getOperand(1);
3544   EVT VT = N->getValueType(0);
3545   SDLoc DL(N);
3546
3547   if (VT.isVector()) {
3548     // fold (mulhs x, 0) -> 0
3549     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3550       return N1;
3551     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3552       return N0;
3553   }
3554
3555   // fold (mulhs x, 0) -> 0
3556   if (isNullConstant(N1))
3557     return N1;
3558   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3559   if (isOneConstant(N1))
3560     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3561                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3562                                        getShiftAmountTy(N0.getValueType())));
3563
3564   // fold (mulhs x, undef) -> 0
3565   if (N0.isUndef() || N1.isUndef())
3566     return DAG.getConstant(0, DL, VT);
3567
3568   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3569   // plus a shift.
3570   if (VT.isSimple() && !VT.isVector()) {
3571     MVT Simple = VT.getSimpleVT();
3572     unsigned SimpleSize = Simple.getSizeInBits();
3573     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3574     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3575       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3576       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3577       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3578       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3579             DAG.getConstant(SimpleSize, DL,
3580                             getShiftAmountTy(N1.getValueType())));
3581       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3582     }
3583   }
3584
3585   return SDValue();
3586 }
3587
3588 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3589   SDValue N0 = N->getOperand(0);
3590   SDValue N1 = N->getOperand(1);
3591   EVT VT = N->getValueType(0);
3592   SDLoc DL(N);
3593
3594   if (VT.isVector()) {
3595     // fold (mulhu x, 0) -> 0
3596     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3597       return N1;
3598     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3599       return N0;
3600   }
3601
3602   // fold (mulhu x, 0) -> 0
3603   if (isNullConstant(N1))
3604     return N1;
3605   // fold (mulhu x, 1) -> 0
3606   if (isOneConstant(N1))
3607     return DAG.getConstant(0, DL, N0.getValueType());
3608   // fold (mulhu x, undef) -> 0
3609   if (N0.isUndef() || N1.isUndef())
3610     return DAG.getConstant(0, DL, VT);
3611
3612   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3613   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3614       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3615     SDLoc DL(N);
3616     unsigned NumEltBits = VT.getScalarSizeInBits();
3617     SDValue LogBase2 = BuildLogBase2(N1, DL);
3618     SDValue SRLAmt = DAG.getNode(
3619         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3620     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3621     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3622     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3623   }
3624
3625   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3626   // plus a shift.
3627   if (VT.isSimple() && !VT.isVector()) {
3628     MVT Simple = VT.getSimpleVT();
3629     unsigned SimpleSize = Simple.getSizeInBits();
3630     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3631     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3632       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3633       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3634       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3635       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3636             DAG.getConstant(SimpleSize, DL,
3637                             getShiftAmountTy(N1.getValueType())));
3638       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3639     }
3640   }
3641
3642   return SDValue();
3643 }
3644
3645 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3646 /// give the opcodes for the two computations that are being performed. Return
3647 /// true if a simplification was made.
3648 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3649                                                 unsigned HiOp) {
3650   // If the high half is not needed, just compute the low half.
3651   bool HiExists = N->hasAnyUseOfValue(1);
3652   if (!HiExists && (!LegalOperations ||
3653                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3654     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3655     return CombineTo(N, Res, Res);
3656   }
3657
3658   // If the low half is not needed, just compute the high half.
3659   bool LoExists = N->hasAnyUseOfValue(0);
3660   if (!LoExists && (!LegalOperations ||
3661                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3662     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3663     return CombineTo(N, Res, Res);
3664   }
3665
3666   // If both halves are used, return as it is.
3667   if (LoExists && HiExists)
3668     return SDValue();
3669
3670   // If the two computed results can be simplified separately, separate them.
3671   if (LoExists) {
3672     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3673     AddToWorklist(Lo.getNode());
3674     SDValue LoOpt = combine(Lo.getNode());
3675     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3676         (!LegalOperations ||
3677          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3678       return CombineTo(N, LoOpt, LoOpt);
3679   }
3680
3681   if (HiExists) {
3682     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3683     AddToWorklist(Hi.getNode());
3684     SDValue HiOpt = combine(Hi.getNode());
3685     if (HiOpt.getNode() && HiOpt != Hi &&
3686         (!LegalOperations ||
3687          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3688       return CombineTo(N, HiOpt, HiOpt);
3689   }
3690
3691   return SDValue();
3692 }
3693
3694 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3695   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3696     return Res;
3697
3698   EVT VT = N->getValueType(0);
3699   SDLoc DL(N);
3700
3701   // If the type is twice as wide is legal, transform the mulhu to a wider
3702   // multiply plus a shift.
3703   if (VT.isSimple() && !VT.isVector()) {
3704     MVT Simple = VT.getSimpleVT();
3705     unsigned SimpleSize = Simple.getSizeInBits();
3706     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3707     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3708       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3709       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3710       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3711       // Compute the high part as N1.
3712       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3713             DAG.getConstant(SimpleSize, DL,
3714                             getShiftAmountTy(Lo.getValueType())));
3715       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3716       // Compute the low part as N0.
3717       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3718       return CombineTo(N, Lo, Hi);
3719     }
3720   }
3721
3722   return SDValue();
3723 }
3724
3725 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3726   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3727     return Res;
3728
3729   EVT VT = N->getValueType(0);
3730   SDLoc DL(N);
3731
3732   // If the type is twice as wide is legal, transform the mulhu to a wider
3733   // multiply plus a shift.
3734   if (VT.isSimple() && !VT.isVector()) {
3735     MVT Simple = VT.getSimpleVT();
3736     unsigned SimpleSize = Simple.getSizeInBits();
3737     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3738     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3739       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3740       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3741       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3742       // Compute the high part as N1.
3743       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3744             DAG.getConstant(SimpleSize, DL,
3745                             getShiftAmountTy(Lo.getValueType())));
3746       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3747       // Compute the low part as N0.
3748       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3749       return CombineTo(N, Lo, Hi);
3750     }
3751   }
3752
3753   return SDValue();
3754 }
3755
3756 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3757   // (smulo x, 2) -> (saddo x, x)
3758   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3759     if (C2->getAPIntValue() == 2)
3760       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3761                          N->getOperand(0), N->getOperand(0));
3762
3763   return SDValue();
3764 }
3765
3766 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3767   // (umulo x, 2) -> (uaddo x, x)
3768   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3769     if (C2->getAPIntValue() == 2)
3770       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3771                          N->getOperand(0), N->getOperand(0));
3772
3773   return SDValue();
3774 }
3775
3776 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3777   SDValue N0 = N->getOperand(0);
3778   SDValue N1 = N->getOperand(1);
3779   EVT VT = N0.getValueType();
3780
3781   // fold vector ops
3782   if (VT.isVector())
3783     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3784       return FoldedVOp;
3785
3786   // fold operation with constant operands.
3787   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3788   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3789   if (N0C && N1C)
3790     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3791
3792   // canonicalize constant to RHS
3793   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3794      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3795     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3796
3797   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3798   // Only do this if the current op isn't legal and the flipped is.
3799   unsigned Opcode = N->getOpcode();
3800   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3801   if (!TLI.isOperationLegal(Opcode, VT) &&
3802       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3803       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3804     unsigned AltOpcode;
3805     switch (Opcode) {
3806     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3807     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3808     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3809     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3810     default: llvm_unreachable("Unknown MINMAX opcode");
3811     }
3812     if (TLI.isOperationLegal(AltOpcode, VT))
3813       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3814   }
3815
3816   return SDValue();
3817 }
3818
3819 /// If this is a bitwise logic instruction and both operands have the same
3820 /// opcode, try to sink the other opcode after the logic instruction.
3821 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3822   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3823   EVT VT = N0.getValueType();
3824   unsigned LogicOpcode = N->getOpcode();
3825   unsigned HandOpcode = N0.getOpcode();
3826   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3827           LogicOpcode == ISD::XOR) && "Expected logic opcode");
3828   assert(HandOpcode == N1.getOpcode() && "Bad input!");
3829
3830   // Bail early if none of these transforms apply.
3831   if (N0.getNumOperands() == 0)
3832     return SDValue();
3833
3834   // FIXME: We should check number of uses of the operands to not increase
3835   //        the instruction count for all transforms.
3836
3837   // Handle size-changing casts.
3838   SDValue X = N0.getOperand(0);
3839   SDValue Y = N1.getOperand(0);
3840   EVT XVT = X.getValueType();
3841   SDLoc DL(N);
3842   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3843       HandOpcode == ISD::SIGN_EXTEND) {
3844     // If both operands have other uses, this transform would create extra
3845     // instructions without eliminating anything.
3846     if (!N0.hasOneUse() && !N1.hasOneUse())
3847       return SDValue();
3848     // We need matching integer source types.
3849     if (XVT != Y.getValueType())
3850       return SDValue();
3851     // Don't create an illegal op during or after legalization. Don't ever
3852     // create an unsupported vector op.
3853     if ((VT.isVector() || LegalOperations) &&
3854         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
3855       return SDValue();
3856     // Avoid infinite looping with PromoteIntBinOp.
3857     // TODO: Should we apply desirable/legal constraints to all opcodes?
3858     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3859         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3860       return SDValue();
3861     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3862     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3863     return DAG.getNode(HandOpcode, DL, VT, Logic);
3864   }
3865
3866   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3867   if (HandOpcode == ISD::TRUNCATE) {
3868     // If both operands have other uses, this transform would create extra
3869     // instructions without eliminating anything.
3870     if (!N0.hasOneUse() && !N1.hasOneUse())
3871       return SDValue();
3872     // We need matching source types.
3873     if (XVT != Y.getValueType())
3874       return SDValue();
3875     // Don't create an illegal op during or after legalization.
3876     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3877       return SDValue();
3878     // Be extra careful sinking truncate. If it's free, there's no benefit in
3879     // widening a binop. Also, don't create a logic op on an illegal type.
3880     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3881       return SDValue();
3882     if (!TLI.isTypeLegal(XVT))
3883       return SDValue();
3884     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3885     return DAG.getNode(HandOpcode, DL, VT, Logic);
3886   }
3887
3888   // For binops SHL/SRL/SRA/AND:
3889   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3890   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3891        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3892       N0.getOperand(1) == N1.getOperand(1)) {
3893     // If either operand has other uses, this transform is not an improvement.
3894     if (!N0.hasOneUse() || !N1.hasOneUse())
3895       return SDValue();
3896     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3897     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3898   }
3899
3900   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3901   if (HandOpcode == ISD::BSWAP) {
3902     // If either operand has other uses, this transform is not an improvement.
3903     if (!N0.hasOneUse() || !N1.hasOneUse())
3904       return SDValue();
3905     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3906     return DAG.getNode(HandOpcode, DL, VT, Logic);
3907   }
3908
3909   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3910   // Only perform this optimization up until type legalization, before
3911   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3912   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3913   // we don't want to undo this promotion.
3914   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3915   // on scalars.
3916   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3917        Level <= AfterLegalizeTypes) {
3918     // Input types must be integer and the same.
3919     if (XVT.isInteger() && XVT == Y.getValueType()) {
3920       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3921       return DAG.getNode(HandOpcode, DL, VT, Logic);
3922     }
3923   }
3924
3925   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3926   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3927   // If both shuffles use the same mask, and both shuffle within a single
3928   // vector, then it is worthwhile to move the swizzle after the operation.
3929   // The type-legalizer generates this pattern when loading illegal
3930   // vector types from memory. In many cases this allows additional shuffle
3931   // optimizations.
3932   // There are other cases where moving the shuffle after the xor/and/or
3933   // is profitable even if shuffles don't perform a swizzle.
3934   // If both shuffles use the same mask, and both shuffles have the same first
3935   // or second operand, then it might still be profitable to move the shuffle
3936   // after the xor/and/or operation.
3937   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3938     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
3939     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
3940     assert(X.getValueType() == Y.getValueType() &&
3941            "Inputs to shuffles are not the same type");
3942
3943     // Check that both shuffles use the same mask. The masks are known to be of
3944     // the same length because the result vector type is the same.
3945     // Check also that shuffles have only one use to avoid introducing extra
3946     // instructions.
3947     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
3948         !SVN0->getMask().equals(SVN1->getMask()))
3949       return SDValue();
3950
3951     // Don't try to fold this node if it requires introducing a
3952     // build vector of all zeros that might be illegal at this stage.
3953     SDValue ShOp = N0.getOperand(1);
3954     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3955       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3956
3957     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
3958     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3959       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
3960                                   N0.getOperand(0), N1.getOperand(0));
3961       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
3962     }
3963
3964     // Don't try to fold this node if it requires introducing a
3965     // build vector of all zeros that might be illegal at this stage.
3966     ShOp = N0.getOperand(0);
3967     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3968       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3969
3970     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
3971     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
3972       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
3973                                   N1.getOperand(1));
3974       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
3975     }
3976   }
3977
3978   return SDValue();
3979 }
3980
3981 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3982 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3983                                        const SDLoc &DL) {
3984   SDValue LL, LR, RL, RR, N0CC, N1CC;
3985   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3986       !isSetCCEquivalent(N1, RL, RR, N1CC))
3987     return SDValue();
3988
3989   assert(N0.getValueType() == N1.getValueType() &&
3990          "Unexpected operand types for bitwise logic op");
3991   assert(LL.getValueType() == LR.getValueType() &&
3992          RL.getValueType() == RR.getValueType() &&
3993          "Unexpected operand types for setcc");
3994
3995   // If we're here post-legalization or the logic op type is not i1, the logic
3996   // op type must match a setcc result type. Also, all folds require new
3997   // operations on the left and right operands, so those types must match.
3998   EVT VT = N0.getValueType();
3999   EVT OpVT = LL.getValueType();
4000   if (LegalOperations || VT.getScalarType() != MVT::i1)
4001     if (VT != getSetCCResultType(OpVT))
4002       return SDValue();
4003   if (OpVT != RL.getValueType())
4004     return SDValue();
4005
4006   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4007   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4008   bool IsInteger = OpVT.isInteger();
4009   if (LR == RR && CC0 == CC1 && IsInteger) {
4010     bool IsZero = isNullOrNullSplat(LR);
4011     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4012
4013     // All bits clear?
4014     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4015     // All sign bits clear?
4016     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4017     // Any bits set?
4018     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4019     // Any sign bits set?
4020     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4021
4022     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4023     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4024     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4025     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4026     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4027       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4028       AddToWorklist(Or.getNode());
4029       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4030     }
4031
4032     // All bits set?
4033     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4034     // All sign bits set?
4035     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4036     // Any bits clear?
4037     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4038     // Any sign bits clear?
4039     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4040
4041     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4042     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4043     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4044     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4045     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4046       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4047       AddToWorklist(And.getNode());
4048       return DAG.getSetCC(DL, VT, And, LR, CC1);
4049     }
4050   }
4051
4052   // TODO: What is the 'or' equivalent of this fold?
4053   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4054   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4055       IsInteger && CC0 == ISD::SETNE &&
4056       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4057        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4058     SDValue One = DAG.getConstant(1, DL, OpVT);
4059     SDValue Two = DAG.getConstant(2, DL, OpVT);
4060     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4061     AddToWorklist(Add.getNode());
4062     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4063   }
4064
4065   // Try more general transforms if the predicates match and the only user of
4066   // the compares is the 'and' or 'or'.
4067   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4068       N0.hasOneUse() && N1.hasOneUse()) {
4069     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4070     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4071     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4072       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4073       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4074       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4075       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4076       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4077     }
4078   }
4079
4080   // Canonicalize equivalent operands to LL == RL.
4081   if (LL == RR && LR == RL) {
4082     CC1 = ISD::getSetCCSwappedOperands(CC1);
4083     std::swap(RL, RR);
4084   }
4085
4086   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4087   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4088   if (LL == RL && LR == RR) {
4089     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4090                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4091     if (NewCC != ISD::SETCC_INVALID &&
4092         (!LegalOperations ||
4093          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4094           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4095       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4096   }
4097
4098   return SDValue();
4099 }
4100
4101 /// This contains all DAGCombine rules which reduce two values combined by
4102 /// an And operation to a single value. This makes them reusable in the context
4103 /// of visitSELECT(). Rules involving constants are not included as
4104 /// visitSELECT() already handles those cases.
4105 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4106   EVT VT = N1.getValueType();
4107   SDLoc DL(N);
4108
4109   // fold (and x, undef) -> 0
4110   if (N0.isUndef() || N1.isUndef())
4111     return DAG.getConstant(0, DL, VT);
4112
4113   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4114     return V;
4115
4116   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4117       VT.getSizeInBits() <= 64) {
4118     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4119       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4120         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4121         // immediate for an add, but it is legal if its top c2 bits are set,
4122         // transform the ADD so the immediate doesn't need to be materialized
4123         // in a register.
4124         APInt ADDC = ADDI->getAPIntValue();
4125         APInt SRLC = SRLI->getAPIntValue();
4126         if (ADDC.getMinSignedBits() <= 64 &&
4127             SRLC.ult(VT.getSizeInBits()) &&
4128             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4129           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4130                                              SRLC.getZExtValue());
4131           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4132             ADDC |= Mask;
4133             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4134               SDLoc DL0(N0);
4135               SDValue NewAdd =
4136                 DAG.getNode(ISD::ADD, DL0, VT,
4137                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4138               CombineTo(N0.getNode(), NewAdd);
4139               // Return N so it doesn't get rechecked!
4140               return SDValue(N, 0);
4141             }
4142           }
4143         }
4144       }
4145     }
4146   }
4147
4148   // Reduce bit extract of low half of an integer to the narrower type.
4149   // (and (srl i64:x, K), KMask) ->
4150   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4151   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4152     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4153       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4154         unsigned Size = VT.getSizeInBits();
4155         const APInt &AndMask = CAnd->getAPIntValue();
4156         unsigned ShiftBits = CShift->getZExtValue();
4157
4158         // Bail out, this node will probably disappear anyway.
4159         if (ShiftBits == 0)
4160           return SDValue();
4161
4162         unsigned MaskBits = AndMask.countTrailingOnes();
4163         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4164
4165         if (AndMask.isMask() &&
4166             // Required bits must not span the two halves of the integer and
4167             // must fit in the half size type.
4168             (ShiftBits + MaskBits <= Size / 2) &&
4169             TLI.isNarrowingProfitable(VT, HalfVT) &&
4170             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4171             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4172             TLI.isTruncateFree(VT, HalfVT) &&
4173             TLI.isZExtFree(HalfVT, VT)) {
4174           // The isNarrowingProfitable is to avoid regressions on PPC and
4175           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4176           // on downstream users of this. Those patterns could probably be
4177           // extended to handle extensions mixed in.
4178
4179           SDValue SL(N0);
4180           assert(MaskBits <= Size);
4181
4182           // Extracting the highest bit of the low half.
4183           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4184           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4185                                       N0.getOperand(0));
4186
4187           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4188           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4189           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4190           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4191           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4192         }
4193       }
4194     }
4195   }
4196
4197   return SDValue();
4198 }
4199
4200 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4201                                    EVT LoadResultTy, EVT &ExtVT) {
4202   if (!AndC->getAPIntValue().isMask())
4203     return false;
4204
4205   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4206
4207   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4208   EVT LoadedVT = LoadN->getMemoryVT();
4209
4210   if (ExtVT == LoadedVT &&
4211       (!LegalOperations ||
4212        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4213     // ZEXTLOAD will match without needing to change the size of the value being
4214     // loaded.
4215     return true;
4216   }
4217
4218   // Do not change the width of a volatile load.
4219   if (LoadN->isVolatile())
4220     return false;
4221
4222   // Do not generate loads of non-round integer types since these can
4223   // be expensive (and would be wrong if the type is not byte sized).
4224   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4225     return false;
4226
4227   if (LegalOperations &&
4228       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4229     return false;
4230
4231   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4232     return false;
4233
4234   return true;
4235 }
4236
4237 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4238                                     ISD::LoadExtType ExtType, EVT &MemVT,
4239                                     unsigned ShAmt) {
4240   if (!LDST)
4241     return false;
4242   // Only allow byte offsets.
4243   if (ShAmt % 8)
4244     return false;
4245
4246   // Do not generate loads of non-round integer types since these can
4247   // be expensive (and would be wrong if the type is not byte sized).
4248   if (!MemVT.isRound())
4249     return false;
4250
4251   // Don't change the width of a volatile load.
4252   if (LDST->isVolatile())
4253     return false;
4254
4255   // Verify that we are actually reducing a load width here.
4256   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4257     return false;
4258
4259   // Ensure that this isn't going to produce an unsupported unaligned access.
4260   if (ShAmt &&
4261       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4262                               LDST->getAddressSpace(), ShAmt / 8))
4263     return false;
4264
4265   // It's not possible to generate a constant of extended or untyped type.
4266   EVT PtrType = LDST->getBasePtr().getValueType();
4267   if (PtrType == MVT::Untyped || PtrType.isExtended())
4268     return false;
4269
4270   if (isa<LoadSDNode>(LDST)) {
4271     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4272     // Don't transform one with multiple uses, this would require adding a new
4273     // load.
4274     if (!SDValue(Load, 0).hasOneUse())
4275       return false;
4276
4277     if (LegalOperations &&
4278         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4279       return false;
4280
4281     // For the transform to be legal, the load must produce only two values
4282     // (the value loaded and the chain).  Don't transform a pre-increment
4283     // load, for example, which produces an extra value.  Otherwise the
4284     // transformation is not equivalent, and the downstream logic to replace
4285     // uses gets things wrong.
4286     if (Load->getNumValues() > 2)
4287       return false;
4288
4289     // If the load that we're shrinking is an extload and we're not just
4290     // discarding the extension we can't simply shrink the load. Bail.
4291     // TODO: It would be possible to merge the extensions in some cases.
4292     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4293         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4294       return false;
4295
4296     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4297       return false;
4298   } else {
4299     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4300     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4301     // Can't write outside the original store
4302     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4303       return false;
4304
4305     if (LegalOperations &&
4306         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4307       return false;
4308   }
4309   return true;
4310 }
4311
4312 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4313                                     SmallVectorImpl<LoadSDNode*> &Loads,
4314                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4315                                     ConstantSDNode *Mask,
4316                                     SDNode *&NodeToMask) {
4317   // Recursively search for the operands, looking for loads which can be
4318   // narrowed.
4319   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4320     SDValue Op = N->getOperand(i);
4321
4322     if (Op.getValueType().isVector())
4323       return false;
4324
4325     // Some constants may need fixing up later if they are too large.
4326     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4327       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4328           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4329         NodesWithConsts.insert(N);
4330       continue;
4331     }
4332
4333     if (!Op.hasOneUse())
4334       return false;
4335
4336     switch(Op.getOpcode()) {
4337     case ISD::LOAD: {
4338       auto *Load = cast<LoadSDNode>(Op);
4339       EVT ExtVT;
4340       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4341           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4342
4343         // ZEXTLOAD is already small enough.
4344         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4345             ExtVT.bitsGE(Load->getMemoryVT()))
4346           continue;
4347
4348         // Use LE to convert equal sized loads to zext.
4349         if (ExtVT.bitsLE(Load->getMemoryVT()))
4350           Loads.push_back(Load);
4351
4352         continue;
4353       }
4354       return false;
4355     }
4356     case ISD::ZERO_EXTEND:
4357     case ISD::AssertZext: {
4358       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4359       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4360       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4361         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4362         Op.getOperand(0).getValueType();
4363
4364       // We can accept extending nodes if the mask is wider or an equal
4365       // width to the original type.
4366       if (ExtVT.bitsGE(VT))
4367         continue;
4368       break;
4369     }
4370     case ISD::OR:
4371     case ISD::XOR:
4372     case ISD::AND:
4373       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4374                              NodeToMask))
4375         return false;
4376       continue;
4377     }
4378
4379     // Allow one node which will masked along with any loads found.
4380     if (NodeToMask)
4381       return false;
4382
4383     // Also ensure that the node to be masked only produces one data result.
4384     NodeToMask = Op.getNode();
4385     if (NodeToMask->getNumValues() > 1) {
4386       bool HasValue = false;
4387       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4388         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4389         if (VT != MVT::Glue && VT != MVT::Other) {
4390           if (HasValue) {
4391             NodeToMask = nullptr;
4392             return false;
4393           }
4394           HasValue = true;
4395         }
4396       }
4397       assert(HasValue && "Node to be masked has no data result?");
4398     }
4399   }
4400   return true;
4401 }
4402
4403 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4404   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4405   if (!Mask)
4406     return false;
4407
4408   if (!Mask->getAPIntValue().isMask())
4409     return false;
4410
4411   // No need to do anything if the and directly uses a load.
4412   if (isa<LoadSDNode>(N->getOperand(0)))
4413     return false;
4414
4415   SmallVector<LoadSDNode*, 8> Loads;
4416   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4417   SDNode *FixupNode = nullptr;
4418   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4419     if (Loads.size() == 0)
4420       return false;
4421
4422     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4423     SDValue MaskOp = N->getOperand(1);
4424
4425     // If it exists, fixup the single node we allow in the tree that needs
4426     // masking.
4427     if (FixupNode) {
4428       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4429       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4430                                 FixupNode->getValueType(0),
4431                                 SDValue(FixupNode, 0), MaskOp);
4432       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4433       if (And.getOpcode() == ISD ::AND)
4434         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4435     }
4436
4437     // Narrow any constants that need it.
4438     for (auto *LogicN : NodesWithConsts) {
4439       SDValue Op0 = LogicN->getOperand(0);
4440       SDValue Op1 = LogicN->getOperand(1);
4441
4442       if (isa<ConstantSDNode>(Op0))
4443           std::swap(Op0, Op1);
4444
4445       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4446                                 Op1, MaskOp);
4447
4448       DAG.UpdateNodeOperands(LogicN, Op0, And);
4449     }
4450
4451     // Create narrow loads.
4452     for (auto *Load : Loads) {
4453       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4454       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4455                                 SDValue(Load, 0), MaskOp);
4456       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4457       if (And.getOpcode() == ISD ::AND)
4458         And = SDValue(
4459             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4460       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4461       assert(NewLoad &&
4462              "Shouldn't be masking the load if it can't be narrowed");
4463       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4464     }
4465     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4466     return true;
4467   }
4468   return false;
4469 }
4470
4471 // Unfold
4472 //    x &  (-1 'logical shift' y)
4473 // To
4474 //    (x 'opposite logical shift' y) 'logical shift' y
4475 // if it is better for performance.
4476 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4477   assert(N->getOpcode() == ISD::AND);
4478
4479   SDValue N0 = N->getOperand(0);
4480   SDValue N1 = N->getOperand(1);
4481
4482   // Do we actually prefer shifts over mask?
4483   if (!TLI.preferShiftsToClearExtremeBits(N0))
4484     return SDValue();
4485
4486   // Try to match  (-1 '[outer] logical shift' y)
4487   unsigned OuterShift;
4488   unsigned InnerShift; // The opposite direction to the OuterShift.
4489   SDValue Y;           // Shift amount.
4490   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4491     if (!M.hasOneUse())
4492       return false;
4493     OuterShift = M->getOpcode();
4494     if (OuterShift == ISD::SHL)
4495       InnerShift = ISD::SRL;
4496     else if (OuterShift == ISD::SRL)
4497       InnerShift = ISD::SHL;
4498     else
4499       return false;
4500     if (!isAllOnesConstant(M->getOperand(0)))
4501       return false;
4502     Y = M->getOperand(1);
4503     return true;
4504   };
4505
4506   SDValue X;
4507   if (matchMask(N1))
4508     X = N0;
4509   else if (matchMask(N0))
4510     X = N1;
4511   else
4512     return SDValue();
4513
4514   SDLoc DL(N);
4515   EVT VT = N->getValueType(0);
4516
4517   //     tmp = x   'opposite logical shift' y
4518   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4519   //     ret = tmp 'logical shift' y
4520   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4521
4522   return T1;
4523 }
4524
4525 SDValue DAGCombiner::visitAND(SDNode *N) {
4526   SDValue N0 = N->getOperand(0);
4527   SDValue N1 = N->getOperand(1);
4528   EVT VT = N1.getValueType();
4529
4530   // x & x --> x
4531   if (N0 == N1)
4532     return N0;
4533
4534   // fold vector ops
4535   if (VT.isVector()) {
4536     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4537       return FoldedVOp;
4538
4539     // fold (and x, 0) -> 0, vector edition
4540     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4541       // do not return N0, because undef node may exist in N0
4542       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4543                              SDLoc(N), N0.getValueType());
4544     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4545       // do not return N1, because undef node may exist in N1
4546       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4547                              SDLoc(N), N1.getValueType());
4548
4549     // fold (and x, -1) -> x, vector edition
4550     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4551       return N1;
4552     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4553       return N0;
4554   }
4555
4556   // fold (and c1, c2) -> c1&c2
4557   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4558   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4559   if (N0C && N1C && !N1C->isOpaque())
4560     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4561   // canonicalize constant to RHS
4562   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4563       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4564     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4565   // fold (and x, -1) -> x
4566   if (isAllOnesConstant(N1))
4567     return N0;
4568   // if (and x, c) is known to be zero, return 0
4569   unsigned BitWidth = VT.getScalarSizeInBits();
4570   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4571                                    APInt::getAllOnesValue(BitWidth)))
4572     return DAG.getConstant(0, SDLoc(N), VT);
4573
4574   if (SDValue NewSel = foldBinOpIntoSelect(N))
4575     return NewSel;
4576
4577   // reassociate and
4578   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4579     return RAND;
4580
4581   // Try to convert a constant mask AND into a shuffle clear mask.
4582   if (VT.isVector())
4583     if (SDValue Shuffle = XformToShuffleWithZero(N))
4584       return Shuffle;
4585
4586   // fold (and (or x, C), D) -> D if (C & D) == D
4587   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4588     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4589   };
4590   if (N0.getOpcode() == ISD::OR &&
4591       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4592     return N1;
4593   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4594   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4595     SDValue N0Op0 = N0.getOperand(0);
4596     APInt Mask = ~N1C->getAPIntValue();
4597     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4598     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4599       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4600                                  N0.getValueType(), N0Op0);
4601
4602       // Replace uses of the AND with uses of the Zero extend node.
4603       CombineTo(N, Zext);
4604
4605       // We actually want to replace all uses of the any_extend with the
4606       // zero_extend, to avoid duplicating things.  This will later cause this
4607       // AND to be folded.
4608       CombineTo(N0.getNode(), Zext);
4609       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4610     }
4611   }
4612   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4613   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4614   // already be zero by virtue of the width of the base type of the load.
4615   //
4616   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4617   // more cases.
4618   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4619        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4620        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4621        N0.getOperand(0).getResNo() == 0) ||
4622       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4623     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4624                                          N0 : N0.getOperand(0) );
4625
4626     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4627     // This can be a pure constant or a vector splat, in which case we treat the
4628     // vector as a scalar and use the splat value.
4629     APInt Constant = APInt::getNullValue(1);
4630     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4631       Constant = C->getAPIntValue();
4632     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4633       APInt SplatValue, SplatUndef;
4634       unsigned SplatBitSize;
4635       bool HasAnyUndefs;
4636       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4637                                              SplatBitSize, HasAnyUndefs);
4638       if (IsSplat) {
4639         // Undef bits can contribute to a possible optimisation if set, so
4640         // set them.
4641         SplatValue |= SplatUndef;
4642
4643         // The splat value may be something like "0x00FFFFFF", which means 0 for
4644         // the first vector value and FF for the rest, repeating. We need a mask
4645         // that will apply equally to all members of the vector, so AND all the
4646         // lanes of the constant together.
4647         EVT VT = Vector->getValueType(0);
4648         unsigned BitWidth = VT.getScalarSizeInBits();
4649
4650         // If the splat value has been compressed to a bitlength lower
4651         // than the size of the vector lane, we need to re-expand it to
4652         // the lane size.
4653         if (BitWidth > SplatBitSize)
4654           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4655                SplatBitSize < BitWidth;
4656                SplatBitSize = SplatBitSize * 2)
4657             SplatValue |= SplatValue.shl(SplatBitSize);
4658
4659         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4660         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4661         if (SplatBitSize % BitWidth == 0) {
4662           Constant = APInt::getAllOnesValue(BitWidth);
4663           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4664             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4665         }
4666       }
4667     }
4668
4669     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4670     // actually legal and isn't going to get expanded, else this is a false
4671     // optimisation.
4672     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4673                                                     Load->getValueType(0),
4674                                                     Load->getMemoryVT());
4675
4676     // Resize the constant to the same size as the original memory access before
4677     // extension. If it is still the AllOnesValue then this AND is completely
4678     // unneeded.
4679     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4680
4681     bool B;
4682     switch (Load->getExtensionType()) {
4683     default: B = false; break;
4684     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4685     case ISD::ZEXTLOAD:
4686     case ISD::NON_EXTLOAD: B = true; break;
4687     }
4688
4689     if (B && Constant.isAllOnesValue()) {
4690       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4691       // preserve semantics once we get rid of the AND.
4692       SDValue NewLoad(Load, 0);
4693
4694       // Fold the AND away. NewLoad may get replaced immediately.
4695       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4696
4697       if (Load->getExtensionType() == ISD::EXTLOAD) {
4698         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4699                               Load->getValueType(0), SDLoc(Load),
4700                               Load->getChain(), Load->getBasePtr(),
4701                               Load->getOffset(), Load->getMemoryVT(),
4702                               Load->getMemOperand());
4703         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4704         if (Load->getNumValues() == 3) {
4705           // PRE/POST_INC loads have 3 values.
4706           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4707                            NewLoad.getValue(2) };
4708           CombineTo(Load, To, 3, true);
4709         } else {
4710           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4711         }
4712       }
4713
4714       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4715     }
4716   }
4717
4718   // fold (and (load x), 255) -> (zextload x, i8)
4719   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4720   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4721   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4722                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4723                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4724     if (SDValue Res = ReduceLoadWidth(N)) {
4725       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4726         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4727       AddToWorklist(N);
4728       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4729       return SDValue(N, 0);
4730     }
4731   }
4732
4733   if (Level >= AfterLegalizeTypes) {
4734     // Attempt to propagate the AND back up to the leaves which, if they're
4735     // loads, can be combined to narrow loads and the AND node can be removed.
4736     // Perform after legalization so that extend nodes will already be
4737     // combined into the loads.
4738     if (BackwardsPropagateMask(N, DAG)) {
4739       return SDValue(N, 0);
4740     }
4741   }
4742
4743   if (SDValue Combined = visitANDLike(N0, N1, N))
4744     return Combined;
4745
4746   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4747   if (N0.getOpcode() == N1.getOpcode())
4748     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4749       return V;
4750
4751   // Masking the negated extension of a boolean is just the zero-extended
4752   // boolean:
4753   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4754   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4755   //
4756   // Note: the SimplifyDemandedBits fold below can make an information-losing
4757   // transform, and then we have no way to find this better fold.
4758   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4759     if (isNullOrNullSplat(N0.getOperand(0))) {
4760       SDValue SubRHS = N0.getOperand(1);
4761       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4762           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4763         return SubRHS;
4764       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4765           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4766         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4767     }
4768   }
4769
4770   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4771   // fold (and (sra)) -> (and (srl)) when possible.
4772   if (SimplifyDemandedBits(SDValue(N, 0)))
4773     return SDValue(N, 0);
4774
4775   // fold (zext_inreg (extload x)) -> (zextload x)
4776   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4777     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4778     EVT MemVT = LN0->getMemoryVT();
4779     // If we zero all the possible extended bits, then we can turn this into
4780     // a zextload if we are running before legalize or the operation is legal.
4781     unsigned BitWidth = N1.getScalarValueSizeInBits();
4782     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4783                            BitWidth - MemVT.getScalarSizeInBits())) &&
4784         ((!LegalOperations && !LN0->isVolatile()) ||
4785          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4786       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4787                                        LN0->getChain(), LN0->getBasePtr(),
4788                                        MemVT, LN0->getMemOperand());
4789       AddToWorklist(N);
4790       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4791       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4792     }
4793   }
4794   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4795   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4796       N0.hasOneUse()) {
4797     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4798     EVT MemVT = LN0->getMemoryVT();
4799     // If we zero all the possible extended bits, then we can turn this into
4800     // a zextload if we are running before legalize or the operation is legal.
4801     unsigned BitWidth = N1.getScalarValueSizeInBits();
4802     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4803                            BitWidth - MemVT.getScalarSizeInBits())) &&
4804         ((!LegalOperations && !LN0->isVolatile()) ||
4805          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4806       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4807                                        LN0->getChain(), LN0->getBasePtr(),
4808                                        MemVT, LN0->getMemOperand());
4809       AddToWorklist(N);
4810       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4811       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4812     }
4813   }
4814   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4815   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4816     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4817                                            N0.getOperand(1), false))
4818       return BSwap;
4819   }
4820
4821   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4822     return Shifts;
4823
4824   return SDValue();
4825 }
4826
4827 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4828 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4829                                         bool DemandHighBits) {
4830   if (!LegalOperations)
4831     return SDValue();
4832
4833   EVT VT = N->getValueType(0);
4834   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4835     return SDValue();
4836   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4837     return SDValue();
4838
4839   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4840   bool LookPassAnd0 = false;
4841   bool LookPassAnd1 = false;
4842   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4843       std::swap(N0, N1);
4844   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4845       std::swap(N0, N1);
4846   if (N0.getOpcode() == ISD::AND) {
4847     if (!N0.getNode()->hasOneUse())
4848       return SDValue();
4849     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4850     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4851     // This is needed for X86.
4852     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4853                   N01C->getZExtValue() != 0xFFFF))
4854       return SDValue();
4855     N0 = N0.getOperand(0);
4856     LookPassAnd0 = true;
4857   }
4858
4859   if (N1.getOpcode() == ISD::AND) {
4860     if (!N1.getNode()->hasOneUse())
4861       return SDValue();
4862     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4863     if (!N11C || N11C->getZExtValue() != 0xFF)
4864       return SDValue();
4865     N1 = N1.getOperand(0);
4866     LookPassAnd1 = true;
4867   }
4868
4869   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4870     std::swap(N0, N1);
4871   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4872     return SDValue();
4873   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4874     return SDValue();
4875
4876   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4877   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4878   if (!N01C || !N11C)
4879     return SDValue();
4880   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4881     return SDValue();
4882
4883   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4884   SDValue N00 = N0->getOperand(0);
4885   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4886     if (!N00.getNode()->hasOneUse())
4887       return SDValue();
4888     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4889     if (!N001C || N001C->getZExtValue() != 0xFF)
4890       return SDValue();
4891     N00 = N00.getOperand(0);
4892     LookPassAnd0 = true;
4893   }
4894
4895   SDValue N10 = N1->getOperand(0);
4896   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4897     if (!N10.getNode()->hasOneUse())
4898       return SDValue();
4899     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4900     // Also allow 0xFFFF since the bits will be shifted out. This is needed
4901     // for X86.
4902     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4903                    N101C->getZExtValue() != 0xFFFF))
4904       return SDValue();
4905     N10 = N10.getOperand(0);
4906     LookPassAnd1 = true;
4907   }
4908
4909   if (N00 != N10)
4910     return SDValue();
4911
4912   // Make sure everything beyond the low halfword gets set to zero since the SRL
4913   // 16 will clear the top bits.
4914   unsigned OpSizeInBits = VT.getSizeInBits();
4915   if (DemandHighBits && OpSizeInBits > 16) {
4916     // If the left-shift isn't masked out then the only way this is a bswap is
4917     // if all bits beyond the low 8 are 0. In that case the entire pattern
4918     // reduces to a left shift anyway: leave it for other parts of the combiner.
4919     if (!LookPassAnd0)
4920       return SDValue();
4921
4922     // However, if the right shift isn't masked out then it might be because
4923     // it's not needed. See if we can spot that too.
4924     if (!LookPassAnd1 &&
4925         !DAG.MaskedValueIsZero(
4926             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4927       return SDValue();
4928   }
4929
4930   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4931   if (OpSizeInBits > 16) {
4932     SDLoc DL(N);
4933     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4934                       DAG.getConstant(OpSizeInBits - 16, DL,
4935                                       getShiftAmountTy(VT)));
4936   }
4937   return Res;
4938 }
4939
4940 /// Return true if the specified node is an element that makes up a 32-bit
4941 /// packed halfword byteswap.
4942 /// ((x & 0x000000ff) << 8) |
4943 /// ((x & 0x0000ff00) >> 8) |
4944 /// ((x & 0x00ff0000) << 8) |
4945 /// ((x & 0xff000000) >> 8)
4946 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4947   if (!N.getNode()->hasOneUse())
4948     return false;
4949
4950   unsigned Opc = N.getOpcode();
4951   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4952     return false;
4953
4954   SDValue N0 = N.getOperand(0);
4955   unsigned Opc0 = N0.getOpcode();
4956   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4957     return false;
4958
4959   ConstantSDNode *N1C = nullptr;
4960   // SHL or SRL: look upstream for AND mask operand
4961   if (Opc == ISD::AND)
4962     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4963   else if (Opc0 == ISD::AND)
4964     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4965   if (!N1C)
4966     return false;
4967
4968   unsigned MaskByteOffset;
4969   switch (N1C->getZExtValue()) {
4970   default:
4971     return false;
4972   case 0xFF:       MaskByteOffset = 0; break;
4973   case 0xFF00:     MaskByteOffset = 1; break;
4974   case 0xFFFF:
4975     // In case demanded bits didn't clear the bits that will be shifted out.
4976     // This is needed for X86.
4977     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4978       MaskByteOffset = 1;
4979       break;
4980     }
4981     return false;
4982   case 0xFF0000:   MaskByteOffset = 2; break;
4983   case 0xFF000000: MaskByteOffset = 3; break;
4984   }
4985
4986   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4987   if (Opc == ISD::AND) {
4988     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4989       // (x >> 8) & 0xff
4990       // (x >> 8) & 0xff0000
4991       if (Opc0 != ISD::SRL)
4992         return false;
4993       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4994       if (!C || C->getZExtValue() != 8)
4995         return false;
4996     } else {
4997       // (x << 8) & 0xff00
4998       // (x << 8) & 0xff000000
4999       if (Opc0 != ISD::SHL)
5000         return false;
5001       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5002       if (!C || C->getZExtValue() != 8)
5003         return false;
5004     }
5005   } else if (Opc == ISD::SHL) {
5006     // (x & 0xff) << 8
5007     // (x & 0xff0000) << 8
5008     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5009       return false;
5010     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5011     if (!C || C->getZExtValue() != 8)
5012       return false;
5013   } else { // Opc == ISD::SRL
5014     // (x & 0xff00) >> 8
5015     // (x & 0xff000000) >> 8
5016     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5017       return false;
5018     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5019     if (!C || C->getZExtValue() != 8)
5020       return false;
5021   }
5022
5023   if (Parts[MaskByteOffset])
5024     return false;
5025
5026   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5027   return true;
5028 }
5029
5030 /// Match a 32-bit packed halfword bswap. That is
5031 /// ((x & 0x000000ff) << 8) |
5032 /// ((x & 0x0000ff00) >> 8) |
5033 /// ((x & 0x00ff0000) << 8) |
5034 /// ((x & 0xff000000) >> 8)
5035 /// => (rotl (bswap x), 16)
5036 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5037   if (!LegalOperations)
5038     return SDValue();
5039
5040   EVT VT = N->getValueType(0);
5041   if (VT != MVT::i32)
5042     return SDValue();
5043   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5044     return SDValue();
5045
5046   // Look for either
5047   // (or (or (and), (and)), (or (and), (and)))
5048   // (or (or (or (and), (and)), (and)), (and))
5049   if (N0.getOpcode() != ISD::OR)
5050     return SDValue();
5051   SDValue N00 = N0.getOperand(0);
5052   SDValue N01 = N0.getOperand(1);
5053   SDNode *Parts[4] = {};
5054
5055   if (N1.getOpcode() == ISD::OR &&
5056       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5057     // (or (or (and), (and)), (or (and), (and)))
5058     if (!isBSwapHWordElement(N00, Parts))
5059       return SDValue();
5060
5061     if (!isBSwapHWordElement(N01, Parts))
5062       return SDValue();
5063     SDValue N10 = N1.getOperand(0);
5064     if (!isBSwapHWordElement(N10, Parts))
5065       return SDValue();
5066     SDValue N11 = N1.getOperand(1);
5067     if (!isBSwapHWordElement(N11, Parts))
5068       return SDValue();
5069   } else {
5070     // (or (or (or (and), (and)), (and)), (and))
5071     if (!isBSwapHWordElement(N1, Parts))
5072       return SDValue();
5073     if (!isBSwapHWordElement(N01, Parts))
5074       return SDValue();
5075     if (N00.getOpcode() != ISD::OR)
5076       return SDValue();
5077     SDValue N000 = N00.getOperand(0);
5078     if (!isBSwapHWordElement(N000, Parts))
5079       return SDValue();
5080     SDValue N001 = N00.getOperand(1);
5081     if (!isBSwapHWordElement(N001, Parts))
5082       return SDValue();
5083   }
5084
5085   // Make sure the parts are all coming from the same node.
5086   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5087     return SDValue();
5088
5089   SDLoc DL(N);
5090   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5091                               SDValue(Parts[0], 0));
5092
5093   // Result of the bswap should be rotated by 16. If it's not legal, then
5094   // do  (x << 16) | (x >> 16).
5095   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5096   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5097     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5098   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5099     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5100   return DAG.getNode(ISD::OR, DL, VT,
5101                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5102                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5103 }
5104
5105 /// This contains all DAGCombine rules which reduce two values combined by
5106 /// an Or operation to a single value \see visitANDLike().
5107 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5108   EVT VT = N1.getValueType();
5109   SDLoc DL(N);
5110
5111   // fold (or x, undef) -> -1
5112   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5113     return DAG.getAllOnesConstant(DL, VT);
5114
5115   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5116     return V;
5117
5118   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5119   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5120       // Don't increase # computations.
5121       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5122     // We can only do this xform if we know that bits from X that are set in C2
5123     // but not in C1 are already zero.  Likewise for Y.
5124     if (const ConstantSDNode *N0O1C =
5125         getAsNonOpaqueConstant(N0.getOperand(1))) {
5126       if (const ConstantSDNode *N1O1C =
5127           getAsNonOpaqueConstant(N1.getOperand(1))) {
5128         // We can only do this xform if we know that bits from X that are set in
5129         // C2 but not in C1 are already zero.  Likewise for Y.
5130         const APInt &LHSMask = N0O1C->getAPIntValue();
5131         const APInt &RHSMask = N1O1C->getAPIntValue();
5132
5133         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5134             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5135           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5136                                   N0.getOperand(0), N1.getOperand(0));
5137           return DAG.getNode(ISD::AND, DL, VT, X,
5138                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5139         }
5140       }
5141     }
5142   }
5143
5144   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5145   if (N0.getOpcode() == ISD::AND &&
5146       N1.getOpcode() == ISD::AND &&
5147       N0.getOperand(0) == N1.getOperand(0) &&
5148       // Don't increase # computations.
5149       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5150     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5151                             N0.getOperand(1), N1.getOperand(1));
5152     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5153   }
5154
5155   return SDValue();
5156 }
5157
5158 SDValue DAGCombiner::visitOR(SDNode *N) {
5159   SDValue N0 = N->getOperand(0);
5160   SDValue N1 = N->getOperand(1);
5161   EVT VT = N1.getValueType();
5162
5163   // x | x --> x
5164   if (N0 == N1)
5165     return N0;
5166
5167   // fold vector ops
5168   if (VT.isVector()) {
5169     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5170       return FoldedVOp;
5171
5172     // fold (or x, 0) -> x, vector edition
5173     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5174       return N1;
5175     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5176       return N0;
5177
5178     // fold (or x, -1) -> -1, vector edition
5179     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5180       // do not return N0, because undef node may exist in N0
5181       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5182     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5183       // do not return N1, because undef node may exist in N1
5184       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5185
5186     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5187     // Do this only if the resulting shuffle is legal.
5188     if (isa<ShuffleVectorSDNode>(N0) &&
5189         isa<ShuffleVectorSDNode>(N1) &&
5190         // Avoid folding a node with illegal type.
5191         TLI.isTypeLegal(VT)) {
5192       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5193       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5194       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5195       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5196       // Ensure both shuffles have a zero input.
5197       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5198         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5199         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5200         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5201         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5202         bool CanFold = true;
5203         int NumElts = VT.getVectorNumElements();
5204         SmallVector<int, 4> Mask(NumElts);
5205
5206         for (int i = 0; i != NumElts; ++i) {
5207           int M0 = SV0->getMaskElt(i);
5208           int M1 = SV1->getMaskElt(i);
5209
5210           // Determine if either index is pointing to a zero vector.
5211           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5212           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5213
5214           // If one element is zero and the otherside is undef, keep undef.
5215           // This also handles the case that both are undef.
5216           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5217             Mask[i] = -1;
5218             continue;
5219           }
5220
5221           // Make sure only one of the elements is zero.
5222           if (M0Zero == M1Zero) {
5223             CanFold = false;
5224             break;
5225           }
5226
5227           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5228
5229           // We have a zero and non-zero element. If the non-zero came from
5230           // SV0 make the index a LHS index. If it came from SV1, make it
5231           // a RHS index. We need to mod by NumElts because we don't care
5232           // which operand it came from in the original shuffles.
5233           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5234         }
5235
5236         if (CanFold) {
5237           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5238           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5239
5240           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5241           if (!LegalMask) {
5242             std::swap(NewLHS, NewRHS);
5243             ShuffleVectorSDNode::commuteMask(Mask);
5244             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5245           }
5246
5247           if (LegalMask)
5248             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5249         }
5250       }
5251     }
5252   }
5253
5254   // fold (or c1, c2) -> c1|c2
5255   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5256   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5257   if (N0C && N1C && !N1C->isOpaque())
5258     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5259   // canonicalize constant to RHS
5260   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5261      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5262     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5263   // fold (or x, 0) -> x
5264   if (isNullConstant(N1))
5265     return N0;
5266   // fold (or x, -1) -> -1
5267   if (isAllOnesConstant(N1))
5268     return N1;
5269
5270   if (SDValue NewSel = foldBinOpIntoSelect(N))
5271     return NewSel;
5272
5273   // fold (or x, c) -> c iff (x & ~c) == 0
5274   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5275     return N1;
5276
5277   if (SDValue Combined = visitORLike(N0, N1, N))
5278     return Combined;
5279
5280   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5281   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5282     return BSwap;
5283   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5284     return BSwap;
5285
5286   // reassociate or
5287   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5288     return ROR;
5289
5290   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5291   // iff (c1 & c2) != 0 or c1/c2 are undef.
5292   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5293     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5294   };
5295   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5296       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5297     if (SDValue COR = DAG.FoldConstantArithmetic(
5298             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5299       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5300       AddToWorklist(IOR.getNode());
5301       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5302     }
5303   }
5304
5305   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5306   if (N0.getOpcode() == N1.getOpcode())
5307     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5308       return V;
5309
5310   // See if this is some rotate idiom.
5311   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5312     return SDValue(Rot, 0);
5313
5314   if (SDValue Load = MatchLoadCombine(N))
5315     return Load;
5316
5317   // Simplify the operands using demanded-bits information.
5318   if (SimplifyDemandedBits(SDValue(N, 0)))
5319     return SDValue(N, 0);
5320
5321   return SDValue();
5322 }
5323
5324 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5325   if (Op.getOpcode() == ISD::AND &&
5326       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5327     Mask = Op.getOperand(1);
5328     return Op.getOperand(0);
5329   }
5330   return Op;
5331 }
5332
5333 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5334 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5335                             SDValue &Mask) {
5336   Op = stripConstantMask(DAG, Op, Mask);
5337   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5338     Shift = Op;
5339     return true;
5340   }
5341   return false;
5342 }
5343
5344 /// Helper function for visitOR to extract the needed side of a rotate idiom
5345 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5346 /// InstCombine merged some outside op with one of the shifts from
5347 /// the rotate pattern.
5348 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5349 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5350 /// patterns:
5351 ///
5352 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5353 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5354 ///
5355 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5356 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5357 ///
5358 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5359 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5360 ///
5361 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5362 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5363 ///
5364 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5365 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5366                                      SDValue ExtractFrom, SDValue &Mask,
5367                                      const SDLoc &DL) {
5368   assert(OppShift && ExtractFrom && "Empty SDValue");
5369   assert(
5370       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5371       "Existing shift must be valid as a rotate half");
5372
5373   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5374   // Preconditions:
5375   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5376   //
5377   // Find opcode of the needed shift to be extracted from (op0 v c0).
5378   unsigned Opcode = ISD::DELETED_NODE;
5379   bool IsMulOrDiv = false;
5380   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5381   // opcode or its arithmetic (mul or udiv) variant.
5382   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5383     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5384     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5385       return false;
5386     Opcode = NeededShift;
5387     return true;
5388   };
5389   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5390   // that the needed shift can be extracted from.
5391   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5392       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5393     return SDValue();
5394
5395   // op0 must be the same opcode on both sides, have the same LHS argument,
5396   // and produce the same value type.
5397   SDValue OppShiftLHS = OppShift.getOperand(0);
5398   EVT ShiftedVT = OppShiftLHS.getValueType();
5399   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5400       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5401       ShiftedVT != ExtractFrom.getValueType())
5402     return SDValue();
5403
5404   // Amount of the existing shift.
5405   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5406   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5407   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5408   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5409   ConstantSDNode *ExtractFromCst =
5410       isConstOrConstSplat(ExtractFrom.getOperand(1));
5411   // TODO: We should be able to handle non-uniform constant vectors for these values
5412   // Check that we have constant values.
5413   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5414       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5415       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5416     return SDValue();
5417
5418   // Compute the shift amount we need to extract to complete the rotate.
5419   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5420   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5421     return SDValue();
5422   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5423   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5424   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5425   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5426   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5427
5428   // Now try extract the needed shift from the ExtractFrom op and see if the
5429   // result matches up with the existing shift's LHS op.
5430   if (IsMulOrDiv) {
5431     // Op to extract from is a mul or udiv by a constant.
5432     // Check:
5433     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5434     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5435     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5436                                                  NeededShiftAmt.getZExtValue());
5437     APInt ResultAmt;
5438     APInt Rem;
5439     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5440     if (Rem != 0 || ResultAmt != OppLHSAmt)
5441       return SDValue();
5442   } else {
5443     // Op to extract from is a shift by a constant.
5444     // Check:
5445     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5446     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5447                                           ExtractFromAmt.getBitWidth()))
5448       return SDValue();
5449   }
5450
5451   // Return the expanded shift op that should allow a rotate to be formed.
5452   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5453   EVT ResVT = ExtractFrom.getValueType();
5454   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5455   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5456 }
5457
5458 // Return true if we can prove that, whenever Neg and Pos are both in the
5459 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5460 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5461 //
5462 //     (or (shift1 X, Neg), (shift2 X, Pos))
5463 //
5464 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5465 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5466 // to consider shift amounts with defined behavior.
5467 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5468                            SelectionDAG &DAG) {
5469   // If EltSize is a power of 2 then:
5470   //
5471   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5472   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5473   //
5474   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5475   // for the stronger condition:
5476   //
5477   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5478   //
5479   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5480   // we can just replace Neg with Neg' for the rest of the function.
5481   //
5482   // In other cases we check for the even stronger condition:
5483   //
5484   //     Neg == EltSize - Pos                                    [B]
5485   //
5486   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5487   // behavior if Pos == 0 (and consequently Neg == EltSize).
5488   //
5489   // We could actually use [A] whenever EltSize is a power of 2, but the
5490   // only extra cases that it would match are those uninteresting ones
5491   // where Neg and Pos are never in range at the same time.  E.g. for
5492   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5493   // as well as (sub 32, Pos), but:
5494   //
5495   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5496   //
5497   // always invokes undefined behavior for 32-bit X.
5498   //
5499   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5500   unsigned MaskLoBits = 0;
5501   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5502     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5503       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5504       unsigned Bits = Log2_64(EltSize);
5505       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5506           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5507         Neg = Neg.getOperand(0);
5508         MaskLoBits = Bits;
5509       }
5510     }
5511   }
5512
5513   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5514   if (Neg.getOpcode() != ISD::SUB)
5515     return false;
5516   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5517   if (!NegC)
5518     return false;
5519   SDValue NegOp1 = Neg.getOperand(1);
5520
5521   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5522   // Pos'.  The truncation is redundant for the purpose of the equality.
5523   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5524     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5525       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5526       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5527           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5528            MaskLoBits))
5529         Pos = Pos.getOperand(0);
5530     }
5531   }
5532
5533   // The condition we need is now:
5534   //
5535   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5536   //
5537   // If NegOp1 == Pos then we need:
5538   //
5539   //              EltSize & Mask == NegC & Mask
5540   //
5541   // (because "x & Mask" is a truncation and distributes through subtraction).
5542   APInt Width;
5543   if (Pos == NegOp1)
5544     Width = NegC->getAPIntValue();
5545
5546   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5547   // Then the condition we want to prove becomes:
5548   //
5549   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5550   //
5551   // which, again because "x & Mask" is a truncation, becomes:
5552   //
5553   //                NegC & Mask == (EltSize - PosC) & Mask
5554   //             EltSize & Mask == (NegC + PosC) & Mask
5555   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5556     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5557       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5558     else
5559       return false;
5560   } else
5561     return false;
5562
5563   // Now we just need to check that EltSize & Mask == Width & Mask.
5564   if (MaskLoBits)
5565     // EltSize & Mask is 0 since Mask is EltSize - 1.
5566     return Width.getLoBits(MaskLoBits) == 0;
5567   return Width == EltSize;
5568 }
5569
5570 // A subroutine of MatchRotate used once we have found an OR of two opposite
5571 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5572 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5573 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5574 // Neg with outer conversions stripped away.
5575 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5576                                        SDValue Neg, SDValue InnerPos,
5577                                        SDValue InnerNeg, unsigned PosOpcode,
5578                                        unsigned NegOpcode, const SDLoc &DL) {
5579   // fold (or (shl x, (*ext y)),
5580   //          (srl x, (*ext (sub 32, y)))) ->
5581   //   (rotl x, y) or (rotr x, (sub 32, y))
5582   //
5583   // fold (or (shl x, (*ext (sub 32, y))),
5584   //          (srl x, (*ext y))) ->
5585   //   (rotr x, y) or (rotl x, (sub 32, y))
5586   EVT VT = Shifted.getValueType();
5587   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5588     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5589     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5590                        HasPos ? Pos : Neg).getNode();
5591   }
5592
5593   return nullptr;
5594 }
5595
5596 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5597 // idioms for rotate, and if the target supports rotation instructions, generate
5598 // a rot[lr].
5599 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5600   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5601   EVT VT = LHS.getValueType();
5602   if (!TLI.isTypeLegal(VT)) return nullptr;
5603
5604   // The target must have at least one rotate flavor.
5605   bool HasROTL = hasOperation(ISD::ROTL, VT);
5606   bool HasROTR = hasOperation(ISD::ROTR, VT);
5607   if (!HasROTL && !HasROTR) return nullptr;
5608
5609   // Check for truncated rotate.
5610   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5611       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5612     assert(LHS.getValueType() == RHS.getValueType());
5613     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5614       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5615                          SDValue(Rot, 0)).getNode();
5616     }
5617   }
5618
5619   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5620   SDValue LHSShift;   // The shift.
5621   SDValue LHSMask;    // AND value if any.
5622   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5623
5624   SDValue RHSShift;   // The shift.
5625   SDValue RHSMask;    // AND value if any.
5626   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5627
5628   // If neither side matched a rotate half, bail
5629   if (!LHSShift && !RHSShift)
5630     return nullptr;
5631
5632   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5633   // side of the rotate, so try to handle that here. In all cases we need to
5634   // pass the matched shift from the opposite side to compute the opcode and
5635   // needed shift amount to extract.  We still want to do this if both sides
5636   // matched a rotate half because one half may be a potential overshift that
5637   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5638   // single one).
5639
5640   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5641   if (LHSShift)
5642     if (SDValue NewRHSShift =
5643             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5644       RHSShift = NewRHSShift;
5645   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5646   if (RHSShift)
5647     if (SDValue NewLHSShift =
5648             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5649       LHSShift = NewLHSShift;
5650
5651   // If a side is still missing, nothing else we can do.
5652   if (!RHSShift || !LHSShift)
5653     return nullptr;
5654
5655   // At this point we've matched or extracted a shift op on each side.
5656
5657   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5658     return nullptr;   // Not shifting the same value.
5659
5660   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5661     return nullptr;   // Shifts must disagree.
5662
5663   // Canonicalize shl to left side in a shl/srl pair.
5664   if (RHSShift.getOpcode() == ISD::SHL) {
5665     std::swap(LHS, RHS);
5666     std::swap(LHSShift, RHSShift);
5667     std::swap(LHSMask, RHSMask);
5668   }
5669
5670   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5671   SDValue LHSShiftArg = LHSShift.getOperand(0);
5672   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5673   SDValue RHSShiftArg = RHSShift.getOperand(0);
5674   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5675
5676   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5677   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5678   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5679                                         ConstantSDNode *RHS) {
5680     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5681   };
5682   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5683     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5684                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5685
5686     // If there is an AND of either shifted operand, apply it to the result.
5687     if (LHSMask.getNode() || RHSMask.getNode()) {
5688       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5689       SDValue Mask = AllOnes;
5690
5691       if (LHSMask.getNode()) {
5692         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5693         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5694                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5695       }
5696       if (RHSMask.getNode()) {
5697         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5698         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5699                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5700       }
5701
5702       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5703     }
5704
5705     return Rot.getNode();
5706   }
5707
5708   // If there is a mask here, and we have a variable shift, we can't be sure
5709   // that we're masking out the right stuff.
5710   if (LHSMask.getNode() || RHSMask.getNode())
5711     return nullptr;
5712
5713   // If the shift amount is sign/zext/any-extended just peel it off.
5714   SDValue LExtOp0 = LHSShiftAmt;
5715   SDValue RExtOp0 = RHSShiftAmt;
5716   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5717        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5718        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5719        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5720       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5721        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5722        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5723        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5724     LExtOp0 = LHSShiftAmt.getOperand(0);
5725     RExtOp0 = RHSShiftAmt.getOperand(0);
5726   }
5727
5728   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5729                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5730   if (TryL)
5731     return TryL;
5732
5733   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5734                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5735   if (TryR)
5736     return TryR;
5737
5738   return nullptr;
5739 }
5740
5741 namespace {
5742
5743 /// Represents known origin of an individual byte in load combine pattern. The
5744 /// value of the byte is either constant zero or comes from memory.
5745 struct ByteProvider {
5746   // For constant zero providers Load is set to nullptr. For memory providers
5747   // Load represents the node which loads the byte from memory.
5748   // ByteOffset is the offset of the byte in the value produced by the load.
5749   LoadSDNode *Load = nullptr;
5750   unsigned ByteOffset = 0;
5751
5752   ByteProvider() = default;
5753
5754   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5755     return ByteProvider(Load, ByteOffset);
5756   }
5757
5758   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5759
5760   bool isConstantZero() const { return !Load; }
5761   bool isMemory() const { return Load; }
5762
5763   bool operator==(const ByteProvider &Other) const {
5764     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5765   }
5766
5767 private:
5768   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5769       : Load(Load), ByteOffset(ByteOffset) {}
5770 };
5771
5772 } // end anonymous namespace
5773
5774 /// Recursively traverses the expression calculating the origin of the requested
5775 /// byte of the given value. Returns None if the provider can't be calculated.
5776 ///
5777 /// For all the values except the root of the expression verifies that the value
5778 /// has exactly one use and if it's not true return None. This way if the origin
5779 /// of the byte is returned it's guaranteed that the values which contribute to
5780 /// the byte are not used outside of this expression.
5781 ///
5782 /// Because the parts of the expression are not allowed to have more than one
5783 /// use this function iterates over trees, not DAGs. So it never visits the same
5784 /// node more than once.
5785 static const Optional<ByteProvider>
5786 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5787                       bool Root = false) {
5788   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5789   if (Depth == 10)
5790     return None;
5791
5792   if (!Root && !Op.hasOneUse())
5793     return None;
5794
5795   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5796   unsigned BitWidth = Op.getValueSizeInBits();
5797   if (BitWidth % 8 != 0)
5798     return None;
5799   unsigned ByteWidth = BitWidth / 8;
5800   assert(Index < ByteWidth && "invalid index requested");
5801   (void) ByteWidth;
5802
5803   switch (Op.getOpcode()) {
5804   case ISD::OR: {
5805     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5806     if (!LHS)
5807       return None;
5808     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5809     if (!RHS)
5810       return None;
5811
5812     if (LHS->isConstantZero())
5813       return RHS;
5814     if (RHS->isConstantZero())
5815       return LHS;
5816     return None;
5817   }
5818   case ISD::SHL: {
5819     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5820     if (!ShiftOp)
5821       return None;
5822
5823     uint64_t BitShift = ShiftOp->getZExtValue();
5824     if (BitShift % 8 != 0)
5825       return None;
5826     uint64_t ByteShift = BitShift / 8;
5827
5828     return Index < ByteShift
5829                ? ByteProvider::getConstantZero()
5830                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5831                                        Depth + 1);
5832   }
5833   case ISD::ANY_EXTEND:
5834   case ISD::SIGN_EXTEND:
5835   case ISD::ZERO_EXTEND: {
5836     SDValue NarrowOp = Op->getOperand(0);
5837     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5838     if (NarrowBitWidth % 8 != 0)
5839       return None;
5840     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5841
5842     if (Index >= NarrowByteWidth)
5843       return Op.getOpcode() == ISD::ZERO_EXTEND
5844                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5845                  : None;
5846     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5847   }
5848   case ISD::BSWAP:
5849     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5850                                  Depth + 1);
5851   case ISD::LOAD: {
5852     auto L = cast<LoadSDNode>(Op.getNode());
5853     if (L->isVolatile() || L->isIndexed())
5854       return None;
5855
5856     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5857     if (NarrowBitWidth % 8 != 0)
5858       return None;
5859     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5860
5861     if (Index >= NarrowByteWidth)
5862       return L->getExtensionType() == ISD::ZEXTLOAD
5863                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5864                  : None;
5865     return ByteProvider::getMemory(L, Index);
5866   }
5867   }
5868
5869   return None;
5870 }
5871
5872 /// Match a pattern where a wide type scalar value is loaded by several narrow
5873 /// loads and combined by shifts and ors. Fold it into a single load or a load
5874 /// and a BSWAP if the targets supports it.
5875 ///
5876 /// Assuming little endian target:
5877 ///  i8 *a = ...
5878 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5879 /// =>
5880 ///  i32 val = *((i32)a)
5881 ///
5882 ///  i8 *a = ...
5883 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5884 /// =>
5885 ///  i32 val = BSWAP(*((i32)a))
5886 ///
5887 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5888 /// interact well with the worklist mechanism. When a part of the pattern is
5889 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5890 /// but the root node of the pattern which triggers the load combine is not
5891 /// necessarily a direct user of the changed node. For example, once the address
5892 /// of t28 load is reassociated load combine won't be triggered:
5893 ///             t25: i32 = add t4, Constant:i32<2>
5894 ///           t26: i64 = sign_extend t25
5895 ///        t27: i64 = add t2, t26
5896 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5897 ///     t29: i32 = zero_extend t28
5898 ///   t32: i32 = shl t29, Constant:i8<8>
5899 /// t33: i32 = or t23, t32
5900 /// As a possible fix visitLoad can check if the load can be a part of a load
5901 /// combine pattern and add corresponding OR roots to the worklist.
5902 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5903   assert(N->getOpcode() == ISD::OR &&
5904          "Can only match load combining against OR nodes");
5905
5906   // Handles simple types only
5907   EVT VT = N->getValueType(0);
5908   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5909     return SDValue();
5910   unsigned ByteWidth = VT.getSizeInBits() / 8;
5911
5912   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5913   // Before legalize we can introduce too wide illegal loads which will be later
5914   // split into legal sized loads. This enables us to combine i64 load by i8
5915   // patterns to a couple of i32 loads on 32 bit targets.
5916   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5917     return SDValue();
5918
5919   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5920     unsigned BW, unsigned i) { return i; };
5921   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5922     unsigned BW, unsigned i) { return BW - i - 1; };
5923
5924   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5925   auto MemoryByteOffset = [&] (ByteProvider P) {
5926     assert(P.isMemory() && "Must be a memory byte provider");
5927     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5928     assert(LoadBitWidth % 8 == 0 &&
5929            "can only analyze providers for individual bytes not bit");
5930     unsigned LoadByteWidth = LoadBitWidth / 8;
5931     return IsBigEndianTarget
5932             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5933             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5934   };
5935
5936   Optional<BaseIndexOffset> Base;
5937   SDValue Chain;
5938
5939   SmallPtrSet<LoadSDNode *, 8> Loads;
5940   Optional<ByteProvider> FirstByteProvider;
5941   int64_t FirstOffset = INT64_MAX;
5942
5943   // Check if all the bytes of the OR we are looking at are loaded from the same
5944   // base address. Collect bytes offsets from Base address in ByteOffsets.
5945   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5946   for (unsigned i = 0; i < ByteWidth; i++) {
5947     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5948     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5949       return SDValue();
5950
5951     LoadSDNode *L = P->Load;
5952     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5953            "Must be enforced by calculateByteProvider");
5954     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5955
5956     // All loads must share the same chain
5957     SDValue LChain = L->getChain();
5958     if (!Chain)
5959       Chain = LChain;
5960     else if (Chain != LChain)
5961       return SDValue();
5962
5963     // Loads must share the same base address
5964     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5965     int64_t ByteOffsetFromBase = 0;
5966     if (!Base)
5967       Base = Ptr;
5968     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5969       return SDValue();
5970
5971     // Calculate the offset of the current byte from the base address
5972     ByteOffsetFromBase += MemoryByteOffset(*P);
5973     ByteOffsets[i] = ByteOffsetFromBase;
5974
5975     // Remember the first byte load
5976     if (ByteOffsetFromBase < FirstOffset) {
5977       FirstByteProvider = P;
5978       FirstOffset = ByteOffsetFromBase;
5979     }
5980
5981     Loads.insert(L);
5982   }
5983   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5984          "memory, so there must be at least one load which produces the value");
5985   assert(Base && "Base address of the accessed memory location must be set");
5986   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5987
5988   // Check if the bytes of the OR we are looking at match with either big or
5989   // little endian value load
5990   bool BigEndian = true, LittleEndian = true;
5991   for (unsigned i = 0; i < ByteWidth; i++) {
5992     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5993     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5994     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5995     if (!BigEndian && !LittleEndian)
5996       return SDValue();
5997   }
5998   assert((BigEndian != LittleEndian) && "should be either or");
5999   assert(FirstByteProvider && "must be set");
6000
6001   // Ensure that the first byte is loaded from zero offset of the first load.
6002   // So the combined value can be loaded from the first load address.
6003   if (MemoryByteOffset(*FirstByteProvider) != 0)
6004     return SDValue();
6005   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6006
6007   // The node we are looking at matches with the pattern, check if we can
6008   // replace it with a single load and bswap if needed.
6009
6010   // If the load needs byte swap check if the target supports it
6011   bool NeedsBswap = IsBigEndianTarget != BigEndian;
6012
6013   // Before legalize we can introduce illegal bswaps which will be later
6014   // converted to an explicit bswap sequence. This way we end up with a single
6015   // load and byte shuffling instead of several loads and byte shuffling.
6016   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6017     return SDValue();
6018
6019   // Check that a load of the wide type is both allowed and fast on the target
6020   bool Fast = false;
6021   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6022                                         VT, FirstLoad->getAddressSpace(),
6023                                         FirstLoad->getAlignment(), &Fast);
6024   if (!Allowed || !Fast)
6025     return SDValue();
6026
6027   SDValue NewLoad =
6028       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6029                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6030
6031   // Transfer chain users from old loads to the new load.
6032   for (LoadSDNode *L : Loads)
6033     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6034
6035   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6036 }
6037
6038 // If the target has andn, bsl, or a similar bit-select instruction,
6039 // we want to unfold masked merge, with canonical pattern of:
6040 //   |        A  |  |B|
6041 //   ((x ^ y) & m) ^ y
6042 //    |  D  |
6043 // Into:
6044 //   (x & m) | (y & ~m)
6045 // If y is a constant, and the 'andn' does not work with immediates,
6046 // we unfold into a different pattern:
6047 //   ~(~x & m) & (m | y)
6048 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6049 //       the very least that breaks andnpd / andnps patterns, and because those
6050 //       patterns are simplified in IR and shouldn't be created in the DAG
6051 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6052   assert(N->getOpcode() == ISD::XOR);
6053
6054   // Don't touch 'not' (i.e. where y = -1).
6055   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6056     return SDValue();
6057
6058   EVT VT = N->getValueType(0);
6059
6060   // There are 3 commutable operators in the pattern,
6061   // so we have to deal with 8 possible variants of the basic pattern.
6062   SDValue X, Y, M;
6063   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6064     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6065       return false;
6066     SDValue Xor = And.getOperand(XorIdx);
6067     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6068       return false;
6069     SDValue Xor0 = Xor.getOperand(0);
6070     SDValue Xor1 = Xor.getOperand(1);
6071     // Don't touch 'not' (i.e. where y = -1).
6072     if (isAllOnesOrAllOnesSplat(Xor1))
6073       return false;
6074     if (Other == Xor0)
6075       std::swap(Xor0, Xor1);
6076     if (Other != Xor1)
6077       return false;
6078     X = Xor0;
6079     Y = Xor1;
6080     M = And.getOperand(XorIdx ? 0 : 1);
6081     return true;
6082   };
6083
6084   SDValue N0 = N->getOperand(0);
6085   SDValue N1 = N->getOperand(1);
6086   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6087       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6088     return SDValue();
6089
6090   // Don't do anything if the mask is constant. This should not be reachable.
6091   // InstCombine should have already unfolded this pattern, and DAGCombiner
6092   // probably shouldn't produce it, too.
6093   if (isa<ConstantSDNode>(M.getNode()))
6094     return SDValue();
6095
6096   // We can transform if the target has AndNot
6097   if (!TLI.hasAndNot(M))
6098     return SDValue();
6099
6100   SDLoc DL(N);
6101
6102   // If Y is a constant, check that 'andn' works with immediates.
6103   if (!TLI.hasAndNot(Y)) {
6104     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6105     // If not, we need to do a bit more work to make sure andn is still used.
6106     SDValue NotX = DAG.getNOT(DL, X, VT);
6107     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6108     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6109     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6110     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6111   }
6112
6113   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6114   SDValue NotM = DAG.getNOT(DL, M, VT);
6115   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6116
6117   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6118 }
6119
6120 SDValue DAGCombiner::visitXOR(SDNode *N) {
6121   SDValue N0 = N->getOperand(0);
6122   SDValue N1 = N->getOperand(1);
6123   EVT VT = N0.getValueType();
6124
6125   // fold vector ops
6126   if (VT.isVector()) {
6127     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6128       return FoldedVOp;
6129
6130     // fold (xor x, 0) -> x, vector edition
6131     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6132       return N1;
6133     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6134       return N0;
6135   }
6136
6137   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6138   SDLoc DL(N);
6139   if (N0.isUndef() && N1.isUndef())
6140     return DAG.getConstant(0, DL, VT);
6141   // fold (xor x, undef) -> undef
6142   if (N0.isUndef())
6143     return N0;
6144   if (N1.isUndef())
6145     return N1;
6146   // fold (xor c1, c2) -> c1^c2
6147   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6148   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6149   if (N0C && N1C)
6150     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6151   // canonicalize constant to RHS
6152   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6153      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6154     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6155   // fold (xor x, 0) -> x
6156   if (isNullConstant(N1))
6157     return N0;
6158
6159   if (SDValue NewSel = foldBinOpIntoSelect(N))
6160     return NewSel;
6161
6162   // reassociate xor
6163   if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6164     return RXOR;
6165
6166   // fold !(x cc y) -> (x !cc y)
6167   unsigned N0Opcode = N0.getOpcode();
6168   SDValue LHS, RHS, CC;
6169   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6170     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6171                                                LHS.getValueType().isInteger());
6172     if (!LegalOperations ||
6173         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6174       switch (N0Opcode) {
6175       default:
6176         llvm_unreachable("Unhandled SetCC Equivalent!");
6177       case ISD::SETCC:
6178         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6179       case ISD::SELECT_CC:
6180         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6181                                N0.getOperand(3), NotCC);
6182       }
6183     }
6184   }
6185
6186   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6187   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6188       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6189     SDValue V = N0.getOperand(0);
6190     SDLoc DL0(N0);
6191     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6192                     DAG.getConstant(1, DL0, V.getValueType()));
6193     AddToWorklist(V.getNode());
6194     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6195   }
6196
6197   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6198   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6199       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6200     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6201     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6202       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6203       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6204       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6205       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6206       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6207     }
6208   }
6209   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6210   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6211       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6212     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6213     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6214       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6215       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6216       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6217       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6218       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6219     }
6220   }
6221   // fold (xor (and x, y), y) -> (and (not x), y)
6222   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6223     SDValue X = N0.getOperand(0);
6224     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6225     AddToWorklist(NotX.getNode());
6226     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6227   }
6228
6229   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6230     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6231     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6232     unsigned BitWidth = VT.getScalarSizeInBits();
6233     if (XorC && ShiftC) {
6234       // Don't crash on an oversized shift. We can not guarantee that a bogus
6235       // shift has been simplified to undef.
6236       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6237       if (ShiftAmt < BitWidth) {
6238         APInt Ones = APInt::getAllOnesValue(BitWidth);
6239         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6240         if (XorC->getAPIntValue() == Ones) {
6241           // If the xor constant is a shifted -1, do a 'not' before the shift:
6242           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6243           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6244           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6245           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6246         }
6247       }
6248     }
6249   }
6250
6251   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6252   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6253     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6254     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6255     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6256       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6257       SDValue S0 = S.getOperand(0);
6258       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6259         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6260         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6261           if (C->getAPIntValue() == (OpSizeInBits - 1))
6262             return DAG.getNode(ISD::ABS, DL, VT, S0);
6263       }
6264     }
6265   }
6266
6267   // fold (xor x, x) -> 0
6268   if (N0 == N1)
6269     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6270
6271   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6272   // Here is a concrete example of this equivalence:
6273   // i16   x ==  14
6274   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6275   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6276   //
6277   // =>
6278   //
6279   // i16     ~1      == 0b1111111111111110
6280   // i16 rol(~1, 14) == 0b1011111111111111
6281   //
6282   // Some additional tips to help conceptualize this transform:
6283   // - Try to see the operation as placing a single zero in a value of all ones.
6284   // - There exists no value for x which would allow the result to contain zero.
6285   // - Values of x larger than the bitwidth are undefined and do not require a
6286   //   consistent result.
6287   // - Pushing the zero left requires shifting one bits in from the right.
6288   // A rotate left of ~1 is a nice way of achieving the desired result.
6289   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6290       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6291     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6292                        N0.getOperand(1));
6293   }
6294
6295   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6296   if (N0Opcode == N1.getOpcode())
6297     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6298       return V;
6299
6300   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6301   if (SDValue MM = unfoldMaskedMerge(N))
6302     return MM;
6303
6304   // Simplify the expression using non-local knowledge.
6305   if (SimplifyDemandedBits(SDValue(N, 0)))
6306     return SDValue(N, 0);
6307
6308   return SDValue();
6309 }
6310
6311 /// Handle transforms common to the three shifts, when the shift amount is a
6312 /// constant.
6313 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6314   // Do not turn a 'not' into a regular xor.
6315   if (isBitwiseNot(N->getOperand(0)))
6316     return SDValue();
6317
6318   SDNode *LHS = N->getOperand(0).getNode();
6319   if (!LHS->hasOneUse()) return SDValue();
6320
6321   // We want to pull some binops through shifts, so that we have (and (shift))
6322   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6323   // thing happens with address calculations, so it's important to canonicalize
6324   // it.
6325   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6326
6327   switch (LHS->getOpcode()) {
6328   default: return SDValue();
6329   case ISD::OR:
6330   case ISD::XOR:
6331     HighBitSet = false; // We can only transform sra if the high bit is clear.
6332     break;
6333   case ISD::AND:
6334     HighBitSet = true;  // We can only transform sra if the high bit is set.
6335     break;
6336   case ISD::ADD:
6337     if (N->getOpcode() != ISD::SHL)
6338       return SDValue(); // only shl(add) not sr[al](add).
6339     HighBitSet = false; // We can only transform sra if the high bit is clear.
6340     break;
6341   }
6342
6343   // We require the RHS of the binop to be a constant and not opaque as well.
6344   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6345   if (!BinOpCst) return SDValue();
6346
6347   // FIXME: disable this unless the input to the binop is a shift by a constant
6348   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6349   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6350   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6351                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6352                  BinOpLHSVal->getOpcode() == ISD::SRL;
6353   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6354                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6355
6356   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6357       !isCopyOrSelect)
6358     return SDValue();
6359
6360   if (isCopyOrSelect && N->hasOneUse())
6361     return SDValue();
6362
6363   EVT VT = N->getValueType(0);
6364
6365   // If this is a signed shift right, and the high bit is modified by the
6366   // logical operation, do not perform the transformation. The highBitSet
6367   // boolean indicates the value of the high bit of the constant which would
6368   // cause it to be modified for this operation.
6369   if (N->getOpcode() == ISD::SRA) {
6370     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6371     if (BinOpRHSSignSet != HighBitSet)
6372       return SDValue();
6373   }
6374
6375   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6376     return SDValue();
6377
6378   // Fold the constants, shifting the binop RHS by the shift amount.
6379   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6380                                N->getValueType(0),
6381                                LHS->getOperand(1), N->getOperand(1));
6382   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6383
6384   // Create the new shift.
6385   SDValue NewShift = DAG.getNode(N->getOpcode(),
6386                                  SDLoc(LHS->getOperand(0)),
6387                                  VT, LHS->getOperand(0), N->getOperand(1));
6388
6389   // Create the new binop.
6390   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6391 }
6392
6393 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6394   assert(N->getOpcode() == ISD::TRUNCATE);
6395   assert(N->getOperand(0).getOpcode() == ISD::AND);
6396
6397   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6398   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6399     SDValue N01 = N->getOperand(0).getOperand(1);
6400     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6401       SDLoc DL(N);
6402       EVT TruncVT = N->getValueType(0);
6403       SDValue N00 = N->getOperand(0).getOperand(0);
6404       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6405       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6406       AddToWorklist(Trunc00.getNode());
6407       AddToWorklist(Trunc01.getNode());
6408       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6409     }
6410   }
6411
6412   return SDValue();
6413 }
6414
6415 SDValue DAGCombiner::visitRotate(SDNode *N) {
6416   SDLoc dl(N);
6417   SDValue N0 = N->getOperand(0);
6418   SDValue N1 = N->getOperand(1);
6419   EVT VT = N->getValueType(0);
6420   unsigned Bitsize = VT.getScalarSizeInBits();
6421
6422   // fold (rot x, 0) -> x
6423   if (isNullOrNullSplat(N1))
6424     return N0;
6425
6426   // fold (rot x, c) -> x iff (c % BitSize) == 0
6427   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6428     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6429     if (DAG.MaskedValueIsZero(N1, ModuloMask))
6430       return N0;
6431   }
6432
6433   // fold (rot x, c) -> (rot x, c % BitSize)
6434   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6435     if (Cst->getAPIntValue().uge(Bitsize)) {
6436       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6437       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6438                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6439     }
6440   }
6441
6442   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6443   if (N1.getOpcode() == ISD::TRUNCATE &&
6444       N1.getOperand(0).getOpcode() == ISD::AND) {
6445     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6446       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6447   }
6448
6449   unsigned NextOp = N0.getOpcode();
6450   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6451   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6452     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6453     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6454     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6455       EVT ShiftVT = C1->getValueType(0);
6456       bool SameSide = (N->getOpcode() == NextOp);
6457       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6458       if (SDValue CombinedShift =
6459               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6460         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6461         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6462             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6463             BitsizeC.getNode());
6464         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6465                            CombinedShiftNorm);
6466       }
6467     }
6468   }
6469   return SDValue();
6470 }
6471
6472 SDValue DAGCombiner::visitSHL(SDNode *N) {
6473   SDValue N0 = N->getOperand(0);
6474   SDValue N1 = N->getOperand(1);
6475   if (SDValue V = DAG.simplifyShift(N0, N1))
6476     return V;
6477
6478   EVT VT = N0.getValueType();
6479   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6480
6481   // fold vector ops
6482   if (VT.isVector()) {
6483     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6484       return FoldedVOp;
6485
6486     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6487     // If setcc produces all-one true value then:
6488     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6489     if (N1CV && N1CV->isConstant()) {
6490       if (N0.getOpcode() == ISD::AND) {
6491         SDValue N00 = N0->getOperand(0);
6492         SDValue N01 = N0->getOperand(1);
6493         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6494
6495         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6496             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6497                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6498           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6499                                                      N01CV, N1CV))
6500             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6501         }
6502       }
6503     }
6504   }
6505
6506   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6507
6508   // fold (shl c1, c2) -> c1<<c2
6509   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6510   if (N0C && N1C && !N1C->isOpaque())
6511     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6512
6513   if (SDValue NewSel = foldBinOpIntoSelect(N))
6514     return NewSel;
6515
6516   // if (shl x, c) is known to be zero, return 0
6517   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6518                             APInt::getAllOnesValue(OpSizeInBits)))
6519     return DAG.getConstant(0, SDLoc(N), VT);
6520   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6521   if (N1.getOpcode() == ISD::TRUNCATE &&
6522       N1.getOperand(0).getOpcode() == ISD::AND) {
6523     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6524       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6525   }
6526
6527   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6528     return SDValue(N, 0);
6529
6530   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6531   if (N0.getOpcode() == ISD::SHL) {
6532     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6533                                           ConstantSDNode *RHS) {
6534       APInt c1 = LHS->getAPIntValue();
6535       APInt c2 = RHS->getAPIntValue();
6536       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6537       return (c1 + c2).uge(OpSizeInBits);
6538     };
6539     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6540       return DAG.getConstant(0, SDLoc(N), VT);
6541
6542     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6543                                        ConstantSDNode *RHS) {
6544       APInt c1 = LHS->getAPIntValue();
6545       APInt c2 = RHS->getAPIntValue();
6546       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6547       return (c1 + c2).ult(OpSizeInBits);
6548     };
6549     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6550       SDLoc DL(N);
6551       EVT ShiftVT = N1.getValueType();
6552       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6553       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6554     }
6555   }
6556
6557   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6558   // For this to be valid, the second form must not preserve any of the bits
6559   // that are shifted out by the inner shift in the first form.  This means
6560   // the outer shift size must be >= the number of bits added by the ext.
6561   // As a corollary, we don't care what kind of ext it is.
6562   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6563               N0.getOpcode() == ISD::ANY_EXTEND ||
6564               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6565       N0.getOperand(0).getOpcode() == ISD::SHL) {
6566     SDValue N0Op0 = N0.getOperand(0);
6567     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6568       APInt c1 = N0Op0C1->getAPIntValue();
6569       APInt c2 = N1C->getAPIntValue();
6570       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6571
6572       EVT InnerShiftVT = N0Op0.getValueType();
6573       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6574       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6575         SDLoc DL(N0);
6576         APInt Sum = c1 + c2;
6577         if (Sum.uge(OpSizeInBits))
6578           return DAG.getConstant(0, DL, VT);
6579
6580         return DAG.getNode(
6581             ISD::SHL, DL, VT,
6582             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6583             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6584       }
6585     }
6586   }
6587
6588   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6589   // Only fold this if the inner zext has no other uses to avoid increasing
6590   // the total number of instructions.
6591   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6592       N0.getOperand(0).getOpcode() == ISD::SRL) {
6593     SDValue N0Op0 = N0.getOperand(0);
6594     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6595       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6596         uint64_t c1 = N0Op0C1->getZExtValue();
6597         uint64_t c2 = N1C->getZExtValue();
6598         if (c1 == c2) {
6599           SDValue NewOp0 = N0.getOperand(0);
6600           EVT CountVT = NewOp0.getOperand(1).getValueType();
6601           SDLoc DL(N);
6602           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6603                                        NewOp0,
6604                                        DAG.getConstant(c2, DL, CountVT));
6605           AddToWorklist(NewSHL.getNode());
6606           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6607         }
6608       }
6609     }
6610   }
6611
6612   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6613   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6614   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6615       N0->getFlags().hasExact()) {
6616     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6617       uint64_t C1 = N0C1->getZExtValue();
6618       uint64_t C2 = N1C->getZExtValue();
6619       SDLoc DL(N);
6620       if (C1 <= C2)
6621         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6622                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6623       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6624                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6625     }
6626   }
6627
6628   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6629   //                               (and (srl x, (sub c1, c2), MASK)
6630   // Only fold this if the inner shift has no other uses -- if it does, folding
6631   // this will increase the total number of instructions.
6632   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6633       TLI.shouldFoldShiftPairToMask(N, Level)) {
6634     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6635       uint64_t c1 = N0C1->getZExtValue();
6636       if (c1 < OpSizeInBits) {
6637         uint64_t c2 = N1C->getZExtValue();
6638         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6639         SDValue Shift;
6640         if (c2 > c1) {
6641           Mask <<= c2 - c1;
6642           SDLoc DL(N);
6643           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6644                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6645         } else {
6646           Mask.lshrInPlace(c1 - c2);
6647           SDLoc DL(N);
6648           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6649                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6650         }
6651         SDLoc DL(N0);
6652         return DAG.getNode(ISD::AND, DL, VT, Shift,
6653                            DAG.getConstant(Mask, DL, VT));
6654       }
6655     }
6656   }
6657
6658   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6659   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6660       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6661     SDLoc DL(N);
6662     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6663     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6664     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6665   }
6666
6667   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6668   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6669   // Variant of version done on multiply, except mul by a power of 2 is turned
6670   // into a shift.
6671   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6672       N0.getNode()->hasOneUse() &&
6673       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6674       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6675       TLI.isDesirableToCommuteWithShift(N, Level)) {
6676     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6677     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6678     AddToWorklist(Shl0.getNode());
6679     AddToWorklist(Shl1.getNode());
6680     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6681   }
6682
6683   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6684   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6685       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6686       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6687     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6688     if (isConstantOrConstantVector(Shl))
6689       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6690   }
6691
6692   if (N1C && !N1C->isOpaque())
6693     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6694       return NewSHL;
6695
6696   return SDValue();
6697 }
6698
6699 SDValue DAGCombiner::visitSRA(SDNode *N) {
6700   SDValue N0 = N->getOperand(0);
6701   SDValue N1 = N->getOperand(1);
6702   if (SDValue V = DAG.simplifyShift(N0, N1))
6703     return V;
6704
6705   EVT VT = N0.getValueType();
6706   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6707
6708   // Arithmetic shifting an all-sign-bit value is a no-op.
6709   // fold (sra 0, x) -> 0
6710   // fold (sra -1, x) -> -1
6711   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6712     return N0;
6713
6714   // fold vector ops
6715   if (VT.isVector())
6716     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6717       return FoldedVOp;
6718
6719   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6720
6721   // fold (sra c1, c2) -> (sra c1, c2)
6722   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6723   if (N0C && N1C && !N1C->isOpaque())
6724     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6725
6726   if (SDValue NewSel = foldBinOpIntoSelect(N))
6727     return NewSel;
6728
6729   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6730   // sext_inreg.
6731   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6732     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6733     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6734     if (VT.isVector())
6735       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6736                                ExtVT, VT.getVectorNumElements());
6737     if ((!LegalOperations ||
6738          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6739       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6740                          N0.getOperand(0), DAG.getValueType(ExtVT));
6741   }
6742
6743   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6744   // clamp (add c1, c2) to max shift.
6745   if (N0.getOpcode() == ISD::SRA) {
6746     SDLoc DL(N);
6747     EVT ShiftVT = N1.getValueType();
6748     EVT ShiftSVT = ShiftVT.getScalarType();
6749     SmallVector<SDValue, 16> ShiftValues;
6750
6751     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6752       APInt c1 = LHS->getAPIntValue();
6753       APInt c2 = RHS->getAPIntValue();
6754       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6755       APInt Sum = c1 + c2;
6756       unsigned ShiftSum =
6757           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6758       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6759       return true;
6760     };
6761     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6762       SDValue ShiftValue;
6763       if (VT.isVector())
6764         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6765       else
6766         ShiftValue = ShiftValues[0];
6767       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6768     }
6769   }
6770
6771   // fold (sra (shl X, m), (sub result_size, n))
6772   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6773   // result_size - n != m.
6774   // If truncate is free for the target sext(shl) is likely to result in better
6775   // code.
6776   if (N0.getOpcode() == ISD::SHL && N1C) {
6777     // Get the two constanst of the shifts, CN0 = m, CN = n.
6778     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6779     if (N01C) {
6780       LLVMContext &Ctx = *DAG.getContext();
6781       // Determine what the truncate's result bitsize and type would be.
6782       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6783
6784       if (VT.isVector())
6785         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6786
6787       // Determine the residual right-shift amount.
6788       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6789
6790       // If the shift is not a no-op (in which case this should be just a sign
6791       // extend already), the truncated to type is legal, sign_extend is legal
6792       // on that type, and the truncate to that type is both legal and free,
6793       // perform the transform.
6794       if ((ShiftAmt > 0) &&
6795           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6796           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6797           TLI.isTruncateFree(VT, TruncVT)) {
6798         SDLoc DL(N);
6799         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6800             getShiftAmountTy(N0.getOperand(0).getValueType()));
6801         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6802                                     N0.getOperand(0), Amt);
6803         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6804                                     Shift);
6805         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6806                            N->getValueType(0), Trunc);
6807       }
6808     }
6809   }
6810
6811   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6812   if (N1.getOpcode() == ISD::TRUNCATE &&
6813       N1.getOperand(0).getOpcode() == ISD::AND) {
6814     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6815       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6816   }
6817
6818   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6819   //      if c1 is equal to the number of bits the trunc removes
6820   if (N0.getOpcode() == ISD::TRUNCATE &&
6821       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6822        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6823       N0.getOperand(0).hasOneUse() &&
6824       N0.getOperand(0).getOperand(1).hasOneUse() &&
6825       N1C) {
6826     SDValue N0Op0 = N0.getOperand(0);
6827     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6828       unsigned LargeShiftVal = LargeShift->getZExtValue();
6829       EVT LargeVT = N0Op0.getValueType();
6830
6831       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6832         SDLoc DL(N);
6833         SDValue Amt =
6834           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6835                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6836         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6837                                   N0Op0.getOperand(0), Amt);
6838         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6839       }
6840     }
6841   }
6842
6843   // Simplify, based on bits shifted out of the LHS.
6844   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6845     return SDValue(N, 0);
6846
6847   // If the sign bit is known to be zero, switch this to a SRL.
6848   if (DAG.SignBitIsZero(N0))
6849     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6850
6851   if (N1C && !N1C->isOpaque())
6852     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6853       return NewSRA;
6854
6855   return SDValue();
6856 }
6857
6858 SDValue DAGCombiner::visitSRL(SDNode *N) {
6859   SDValue N0 = N->getOperand(0);
6860   SDValue N1 = N->getOperand(1);
6861   if (SDValue V = DAG.simplifyShift(N0, N1))
6862     return V;
6863
6864   EVT VT = N0.getValueType();
6865   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6866
6867   // fold vector ops
6868   if (VT.isVector())
6869     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6870       return FoldedVOp;
6871
6872   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6873
6874   // fold (srl c1, c2) -> c1 >>u c2
6875   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6876   if (N0C && N1C && !N1C->isOpaque())
6877     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6878
6879   if (SDValue NewSel = foldBinOpIntoSelect(N))
6880     return NewSel;
6881
6882   // if (srl x, c) is known to be zero, return 0
6883   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6884                                    APInt::getAllOnesValue(OpSizeInBits)))
6885     return DAG.getConstant(0, SDLoc(N), VT);
6886
6887   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6888   if (N0.getOpcode() == ISD::SRL) {
6889     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6890                                           ConstantSDNode *RHS) {
6891       APInt c1 = LHS->getAPIntValue();
6892       APInt c2 = RHS->getAPIntValue();
6893       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6894       return (c1 + c2).uge(OpSizeInBits);
6895     };
6896     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6897       return DAG.getConstant(0, SDLoc(N), VT);
6898
6899     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6900                                        ConstantSDNode *RHS) {
6901       APInt c1 = LHS->getAPIntValue();
6902       APInt c2 = RHS->getAPIntValue();
6903       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6904       return (c1 + c2).ult(OpSizeInBits);
6905     };
6906     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6907       SDLoc DL(N);
6908       EVT ShiftVT = N1.getValueType();
6909       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6910       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6911     }
6912   }
6913
6914   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6915   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6916       N0.getOperand(0).getOpcode() == ISD::SRL) {
6917     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6918       uint64_t c1 = N001C->getZExtValue();
6919       uint64_t c2 = N1C->getZExtValue();
6920       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6921       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6922       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6923       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6924       if (c1 + OpSizeInBits == InnerShiftSize) {
6925         SDLoc DL(N0);
6926         if (c1 + c2 >= InnerShiftSize)
6927           return DAG.getConstant(0, DL, VT);
6928         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6929                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6930                                        N0.getOperand(0).getOperand(0),
6931                                        DAG.getConstant(c1 + c2, DL,
6932                                                        ShiftCountVT)));
6933       }
6934     }
6935   }
6936
6937   // fold (srl (shl x, c), c) -> (and x, cst2)
6938   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6939       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6940     SDLoc DL(N);
6941     SDValue Mask =
6942         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6943     AddToWorklist(Mask.getNode());
6944     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6945   }
6946
6947   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6948   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6949     // Shifting in all undef bits?
6950     EVT SmallVT = N0.getOperand(0).getValueType();
6951     unsigned BitSize = SmallVT.getScalarSizeInBits();
6952     if (N1C->getZExtValue() >= BitSize)
6953       return DAG.getUNDEF(VT);
6954
6955     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6956       uint64_t ShiftAmt = N1C->getZExtValue();
6957       SDLoc DL0(N0);
6958       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6959                                        N0.getOperand(0),
6960                           DAG.getConstant(ShiftAmt, DL0,
6961                                           getShiftAmountTy(SmallVT)));
6962       AddToWorklist(SmallShift.getNode());
6963       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6964       SDLoc DL(N);
6965       return DAG.getNode(ISD::AND, DL, VT,
6966                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6967                          DAG.getConstant(Mask, DL, VT));
6968     }
6969   }
6970
6971   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6972   // bit, which is unmodified by sra.
6973   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6974     if (N0.getOpcode() == ISD::SRA)
6975       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6976   }
6977
6978   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6979   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6980       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6981     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
6982
6983     // If any of the input bits are KnownOne, then the input couldn't be all
6984     // zeros, thus the result of the srl will always be zero.
6985     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6986
6987     // If all of the bits input the to ctlz node are known to be zero, then
6988     // the result of the ctlz is "32" and the result of the shift is one.
6989     APInt UnknownBits = ~Known.Zero;
6990     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6991
6992     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6993     if (UnknownBits.isPowerOf2()) {
6994       // Okay, we know that only that the single bit specified by UnknownBits
6995       // could be set on input to the CTLZ node. If this bit is set, the SRL
6996       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6997       // to an SRL/XOR pair, which is likely to simplify more.
6998       unsigned ShAmt = UnknownBits.countTrailingZeros();
6999       SDValue Op = N0.getOperand(0);
7000
7001       if (ShAmt) {
7002         SDLoc DL(N0);
7003         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7004                   DAG.getConstant(ShAmt, DL,
7005                                   getShiftAmountTy(Op.getValueType())));
7006         AddToWorklist(Op.getNode());
7007       }
7008
7009       SDLoc DL(N);
7010       return DAG.getNode(ISD::XOR, DL, VT,
7011                          Op, DAG.getConstant(1, DL, VT));
7012     }
7013   }
7014
7015   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7016   if (N1.getOpcode() == ISD::TRUNCATE &&
7017       N1.getOperand(0).getOpcode() == ISD::AND) {
7018     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7019       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7020   }
7021
7022   // fold operands of srl based on knowledge that the low bits are not
7023   // demanded.
7024   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7025     return SDValue(N, 0);
7026
7027   if (N1C && !N1C->isOpaque())
7028     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7029       return NewSRL;
7030
7031   // Attempt to convert a srl of a load into a narrower zero-extending load.
7032   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7033     return NarrowLoad;
7034
7035   // Here is a common situation. We want to optimize:
7036   //
7037   //   %a = ...
7038   //   %b = and i32 %a, 2
7039   //   %c = srl i32 %b, 1
7040   //   brcond i32 %c ...
7041   //
7042   // into
7043   //
7044   //   %a = ...
7045   //   %b = and %a, 2
7046   //   %c = setcc eq %b, 0
7047   //   brcond %c ...
7048   //
7049   // However when after the source operand of SRL is optimized into AND, the SRL
7050   // itself may not be optimized further. Look for it and add the BRCOND into
7051   // the worklist.
7052   if (N->hasOneUse()) {
7053     SDNode *Use = *N->use_begin();
7054     if (Use->getOpcode() == ISD::BRCOND)
7055       AddToWorklist(Use);
7056     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7057       // Also look pass the truncate.
7058       Use = *Use->use_begin();
7059       if (Use->getOpcode() == ISD::BRCOND)
7060         AddToWorklist(Use);
7061     }
7062   }
7063
7064   return SDValue();
7065 }
7066
7067 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7068   EVT VT = N->getValueType(0);
7069   SDValue N0 = N->getOperand(0);
7070   SDValue N1 = N->getOperand(1);
7071   SDValue N2 = N->getOperand(2);
7072   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7073   unsigned BitWidth = VT.getScalarSizeInBits();
7074
7075   // fold (fshl N0, N1, 0) -> N0
7076   // fold (fshr N0, N1, 0) -> N1
7077   if (isPowerOf2_32(BitWidth))
7078     if (DAG.MaskedValueIsZero(
7079             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7080       return IsFSHL ? N0 : N1;
7081
7082   // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7083   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7084     if (Cst->getAPIntValue().uge(BitWidth)) {
7085       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7086       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7087                          DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
7088     }
7089   }
7090
7091   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7092   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7093   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7094   // is legal as well we might be better off avoiding non-constant (BW - N2).
7095   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7096   if (N0 == N1 && hasOperation(RotOpc, VT))
7097     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7098
7099   return SDValue();
7100 }
7101
7102 SDValue DAGCombiner::visitABS(SDNode *N) {
7103   SDValue N0 = N->getOperand(0);
7104   EVT VT = N->getValueType(0);
7105
7106   // fold (abs c1) -> c2
7107   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7108     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7109   // fold (abs (abs x)) -> (abs x)
7110   if (N0.getOpcode() == ISD::ABS)
7111     return N0;
7112   // fold (abs x) -> x iff not-negative
7113   if (DAG.SignBitIsZero(N0))
7114     return N0;
7115   return SDValue();
7116 }
7117
7118 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7119   SDValue N0 = N->getOperand(0);
7120   EVT VT = N->getValueType(0);
7121
7122   // fold (bswap c1) -> c2
7123   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7124     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7125   // fold (bswap (bswap x)) -> x
7126   if (N0.getOpcode() == ISD::BSWAP)
7127     return N0->getOperand(0);
7128   return SDValue();
7129 }
7130
7131 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7132   SDValue N0 = N->getOperand(0);
7133   EVT VT = N->getValueType(0);
7134
7135   // fold (bitreverse c1) -> c2
7136   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7137     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7138   // fold (bitreverse (bitreverse x)) -> x
7139   if (N0.getOpcode() == ISD::BITREVERSE)
7140     return N0.getOperand(0);
7141   return SDValue();
7142 }
7143
7144 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7145   SDValue N0 = N->getOperand(0);
7146   EVT VT = N->getValueType(0);
7147
7148   // fold (ctlz c1) -> c2
7149   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7150     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7151
7152   // If the value is known never to be zero, switch to the undef version.
7153   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7154     if (DAG.isKnownNeverZero(N0))
7155       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7156   }
7157
7158   return SDValue();
7159 }
7160
7161 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7162   SDValue N0 = N->getOperand(0);
7163   EVT VT = N->getValueType(0);
7164
7165   // fold (ctlz_zero_undef c1) -> c2
7166   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7167     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7168   return SDValue();
7169 }
7170
7171 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7172   SDValue N0 = N->getOperand(0);
7173   EVT VT = N->getValueType(0);
7174
7175   // fold (cttz c1) -> c2
7176   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7177     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7178
7179   // If the value is known never to be zero, switch to the undef version.
7180   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7181     if (DAG.isKnownNeverZero(N0))
7182       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7183   }
7184
7185   return SDValue();
7186 }
7187
7188 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7189   SDValue N0 = N->getOperand(0);
7190   EVT VT = N->getValueType(0);
7191
7192   // fold (cttz_zero_undef c1) -> c2
7193   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7194     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7195   return SDValue();
7196 }
7197
7198 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7199   SDValue N0 = N->getOperand(0);
7200   EVT VT = N->getValueType(0);
7201
7202   // fold (ctpop c1) -> c2
7203   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7204     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7205   return SDValue();
7206 }
7207
7208 // FIXME: This should be checking for no signed zeros on individual operands, as
7209 // well as no nans.
7210 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
7211   const TargetOptions &Options = DAG.getTarget().Options;
7212   EVT VT = LHS.getValueType();
7213
7214   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7215          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7216 }
7217
7218 /// Generate Min/Max node
7219 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7220                                    SDValue RHS, SDValue True, SDValue False,
7221                                    ISD::CondCode CC, const TargetLowering &TLI,
7222                                    SelectionDAG &DAG) {
7223   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7224     return SDValue();
7225
7226   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7227   switch (CC) {
7228   case ISD::SETOLT:
7229   case ISD::SETOLE:
7230   case ISD::SETLT:
7231   case ISD::SETLE:
7232   case ISD::SETULT:
7233   case ISD::SETULE: {
7234     // Since it's known never nan to get here already, either fminnum or
7235     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7236     // expanded in terms of it.
7237     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7238     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7239       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7240
7241     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7242     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7243       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7244     return SDValue();
7245   }
7246   case ISD::SETOGT:
7247   case ISD::SETOGE:
7248   case ISD::SETGT:
7249   case ISD::SETGE:
7250   case ISD::SETUGT:
7251   case ISD::SETUGE: {
7252     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7253     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7254       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7255
7256     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7257     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7258       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7259     return SDValue();
7260   }
7261   default:
7262     return SDValue();
7263   }
7264 }
7265
7266 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7267   SDValue Cond = N->getOperand(0);
7268   SDValue N1 = N->getOperand(1);
7269   SDValue N2 = N->getOperand(2);
7270   EVT VT = N->getValueType(0);
7271   EVT CondVT = Cond.getValueType();
7272   SDLoc DL(N);
7273
7274   if (!VT.isInteger())
7275     return SDValue();
7276
7277   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7278   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7279   if (!C1 || !C2)
7280     return SDValue();
7281
7282   // Only do this before legalization to avoid conflicting with target-specific
7283   // transforms in the other direction (create a select from a zext/sext). There
7284   // is also a target-independent combine here in DAGCombiner in the other
7285   // direction for (select Cond, -1, 0) when the condition is not i1.
7286   if (CondVT == MVT::i1 && !LegalOperations) {
7287     if (C1->isNullValue() && C2->isOne()) {
7288       // select Cond, 0, 1 --> zext (!Cond)
7289       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7290       if (VT != MVT::i1)
7291         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7292       return NotCond;
7293     }
7294     if (C1->isNullValue() && C2->isAllOnesValue()) {
7295       // select Cond, 0, -1 --> sext (!Cond)
7296       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7297       if (VT != MVT::i1)
7298         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7299       return NotCond;
7300     }
7301     if (C1->isOne() && C2->isNullValue()) {
7302       // select Cond, 1, 0 --> zext (Cond)
7303       if (VT != MVT::i1)
7304         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7305       return Cond;
7306     }
7307     if (C1->isAllOnesValue() && C2->isNullValue()) {
7308       // select Cond, -1, 0 --> sext (Cond)
7309       if (VT != MVT::i1)
7310         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7311       return Cond;
7312     }
7313
7314     // For any constants that differ by 1, we can transform the select into an
7315     // extend and add. Use a target hook because some targets may prefer to
7316     // transform in the other direction.
7317     if (TLI.convertSelectOfConstantsToMath(VT)) {
7318       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7319         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7320         if (VT != MVT::i1)
7321           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7322         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7323       }
7324       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7325         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7326         if (VT != MVT::i1)
7327           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7328         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7329       }
7330     }
7331
7332     return SDValue();
7333   }
7334
7335   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7336   // We can't do this reliably if integer based booleans have different contents
7337   // to floating point based booleans. This is because we can't tell whether we
7338   // have an integer-based boolean or a floating-point-based boolean unless we
7339   // can find the SETCC that produced it and inspect its operands. This is
7340   // fairly easy if C is the SETCC node, but it can potentially be
7341   // undiscoverable (or not reasonably discoverable). For example, it could be
7342   // in another basic block or it could require searching a complicated
7343   // expression.
7344   if (CondVT.isInteger() &&
7345       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7346           TargetLowering::ZeroOrOneBooleanContent &&
7347       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7348           TargetLowering::ZeroOrOneBooleanContent &&
7349       C1->isNullValue() && C2->isOne()) {
7350     SDValue NotCond =
7351         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7352     if (VT.bitsEq(CondVT))
7353       return NotCond;
7354     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7355   }
7356
7357   return SDValue();
7358 }
7359
7360 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7361   SDValue N0 = N->getOperand(0);
7362   SDValue N1 = N->getOperand(1);
7363   SDValue N2 = N->getOperand(2);
7364   EVT VT = N->getValueType(0);
7365   EVT VT0 = N0.getValueType();
7366   SDLoc DL(N);
7367
7368   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7369     return V;
7370
7371   // fold (select X, X, Y) -> (or X, Y)
7372   // fold (select X, 1, Y) -> (or C, Y)
7373   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7374     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7375
7376   if (SDValue V = foldSelectOfConstants(N))
7377     return V;
7378
7379   // fold (select C, 0, X) -> (and (not C), X)
7380   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7381     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7382     AddToWorklist(NOTNode.getNode());
7383     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7384   }
7385   // fold (select C, X, 1) -> (or (not C), X)
7386   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7387     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7388     AddToWorklist(NOTNode.getNode());
7389     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7390   }
7391   // fold (select X, Y, X) -> (and X, Y)
7392   // fold (select X, Y, 0) -> (and X, Y)
7393   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7394     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7395
7396   // If we can fold this based on the true/false value, do so.
7397   if (SimplifySelectOps(N, N1, N2))
7398     return SDValue(N, 0); // Don't revisit N.
7399
7400   if (VT0 == MVT::i1) {
7401     // The code in this block deals with the following 2 equivalences:
7402     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7403     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7404     // The target can specify its preferred form with the
7405     // shouldNormalizeToSelectSequence() callback. However we always transform
7406     // to the right anyway if we find the inner select exists in the DAG anyway
7407     // and we always transform to the left side if we know that we can further
7408     // optimize the combination of the conditions.
7409     bool normalizeToSequence =
7410         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7411     // select (and Cond0, Cond1), X, Y
7412     //   -> select Cond0, (select Cond1, X, Y), Y
7413     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7414       SDValue Cond0 = N0->getOperand(0);
7415       SDValue Cond1 = N0->getOperand(1);
7416       SDValue InnerSelect =
7417           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7418       if (normalizeToSequence || !InnerSelect.use_empty())
7419         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7420                            InnerSelect, N2);
7421     }
7422     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7423     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7424       SDValue Cond0 = N0->getOperand(0);
7425       SDValue Cond1 = N0->getOperand(1);
7426       SDValue InnerSelect =
7427           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7428       if (normalizeToSequence || !InnerSelect.use_empty())
7429         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7430                            InnerSelect);
7431     }
7432
7433     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7434     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7435       SDValue N1_0 = N1->getOperand(0);
7436       SDValue N1_1 = N1->getOperand(1);
7437       SDValue N1_2 = N1->getOperand(2);
7438       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7439         // Create the actual and node if we can generate good code for it.
7440         if (!normalizeToSequence) {
7441           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7442           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7443         }
7444         // Otherwise see if we can optimize the "and" to a better pattern.
7445         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7446           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7447                              N2);
7448       }
7449     }
7450     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7451     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7452       SDValue N2_0 = N2->getOperand(0);
7453       SDValue N2_1 = N2->getOperand(1);
7454       SDValue N2_2 = N2->getOperand(2);
7455       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7456         // Create the actual or node if we can generate good code for it.
7457         if (!normalizeToSequence) {
7458           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7459           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7460         }
7461         // Otherwise see if we can optimize to a better pattern.
7462         if (SDValue Combined = visitORLike(N0, N2_0, N))
7463           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7464                              N2_2);
7465       }
7466     }
7467   }
7468
7469   if (VT0 == MVT::i1) {
7470     // select (not Cond), N1, N2 -> select Cond, N2, N1
7471     if (isBitwiseNot(N0))
7472       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7473   }
7474
7475   // Fold selects based on a setcc into other things, such as min/max/abs.
7476   if (N0.getOpcode() == ISD::SETCC) {
7477     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7478     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7479
7480     // select (fcmp lt x, y), x, y -> fminnum x, y
7481     // select (fcmp gt x, y), x, y -> fmaxnum x, y
7482     //
7483     // This is OK if we don't care what happens if either operand is a NaN.
7484     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
7485       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7486                                                 CC, TLI, DAG))
7487         return FMinMax;
7488
7489     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7490     // This is conservatively limited to pre-legal-operations to give targets
7491     // a chance to reverse the transform if they want to do that. Also, it is
7492     // unlikely that the pattern would be formed late, so it's probably not
7493     // worth going through the other checks.
7494     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7495         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7496         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7497       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7498       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7499       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7500         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7501         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7502         //
7503         // The IR equivalent of this transform would have this form:
7504         //   %a = add %x, C
7505         //   %c = icmp ugt %x, ~C
7506         //   %r = select %c, -1, %a
7507         //   =>
7508         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7509         //   %u0 = extractvalue %u, 0
7510         //   %u1 = extractvalue %u, 1
7511         //   %r = select %u1, -1, %u0
7512         SDVTList VTs = DAG.getVTList(VT, VT0);
7513         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7514         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7515       }
7516     }
7517
7518     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7519         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7520       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7521                          N0.getOperand(2));
7522
7523     return SimplifySelect(DL, N0, N1, N2);
7524   }
7525
7526   return SDValue();
7527 }
7528
7529 static
7530 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7531   SDLoc DL(N);
7532   EVT LoVT, HiVT;
7533   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7534
7535   // Split the inputs.
7536   SDValue Lo, Hi, LL, LH, RL, RH;
7537   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7538   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7539
7540   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7541   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7542
7543   return std::make_pair(Lo, Hi);
7544 }
7545
7546 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7547 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7548 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7549   SDLoc DL(N);
7550   SDValue Cond = N->getOperand(0);
7551   SDValue LHS = N->getOperand(1);
7552   SDValue RHS = N->getOperand(2);
7553   EVT VT = N->getValueType(0);
7554   int NumElems = VT.getVectorNumElements();
7555   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7556          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7557          Cond.getOpcode() == ISD::BUILD_VECTOR);
7558
7559   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7560   // binary ones here.
7561   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7562     return SDValue();
7563
7564   // We're sure we have an even number of elements due to the
7565   // concat_vectors we have as arguments to vselect.
7566   // Skip BV elements until we find one that's not an UNDEF
7567   // After we find an UNDEF element, keep looping until we get to half the
7568   // length of the BV and see if all the non-undef nodes are the same.
7569   ConstantSDNode *BottomHalf = nullptr;
7570   for (int i = 0; i < NumElems / 2; ++i) {
7571     if (Cond->getOperand(i)->isUndef())
7572       continue;
7573
7574     if (BottomHalf == nullptr)
7575       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7576     else if (Cond->getOperand(i).getNode() != BottomHalf)
7577       return SDValue();
7578   }
7579
7580   // Do the same for the second half of the BuildVector
7581   ConstantSDNode *TopHalf = nullptr;
7582   for (int i = NumElems / 2; i < NumElems; ++i) {
7583     if (Cond->getOperand(i)->isUndef())
7584       continue;
7585
7586     if (TopHalf == nullptr)
7587       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7588     else if (Cond->getOperand(i).getNode() != TopHalf)
7589       return SDValue();
7590   }
7591
7592   assert(TopHalf && BottomHalf &&
7593          "One half of the selector was all UNDEFs and the other was all the "
7594          "same value. This should have been addressed before this function.");
7595   return DAG.getNode(
7596       ISD::CONCAT_VECTORS, DL, VT,
7597       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7598       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7599 }
7600
7601 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7602   if (Level >= AfterLegalizeTypes)
7603     return SDValue();
7604
7605   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7606   SDValue Mask = MSC->getMask();
7607   SDValue Data  = MSC->getValue();
7608   SDLoc DL(N);
7609
7610   // If the MSCATTER data type requires splitting and the mask is provided by a
7611   // SETCC, then split both nodes and its operands before legalization. This
7612   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7613   // and enables future optimizations (e.g. min/max pattern matching on X86).
7614   if (Mask.getOpcode() != ISD::SETCC)
7615     return SDValue();
7616
7617   // Check if any splitting is required.
7618   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7619       TargetLowering::TypeSplitVector)
7620     return SDValue();
7621   SDValue MaskLo, MaskHi;
7622   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7623
7624   EVT LoVT, HiVT;
7625   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7626
7627   SDValue Chain = MSC->getChain();
7628
7629   EVT MemoryVT = MSC->getMemoryVT();
7630   unsigned Alignment = MSC->getOriginalAlignment();
7631
7632   EVT LoMemVT, HiMemVT;
7633   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7634
7635   SDValue DataLo, DataHi;
7636   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7637
7638   SDValue Scale = MSC->getScale();
7639   SDValue BasePtr = MSC->getBasePtr();
7640   SDValue IndexLo, IndexHi;
7641   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7642
7643   MachineMemOperand *MMO = DAG.getMachineFunction().
7644     getMachineMemOperand(MSC->getPointerInfo(),
7645                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7646                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7647
7648   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7649   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7650                                     DataLo.getValueType(), DL, OpsLo, MMO);
7651
7652   // The order of the Scatter operation after split is well defined. The "Hi"
7653   // part comes after the "Lo". So these two operations should be chained one
7654   // after another.
7655   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7656   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7657                               DL, OpsHi, MMO);
7658 }
7659
7660 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7661   if (Level >= AfterLegalizeTypes)
7662     return SDValue();
7663
7664   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7665   SDValue Mask = MST->getMask();
7666   SDValue Data  = MST->getValue();
7667   EVT VT = Data.getValueType();
7668   SDLoc DL(N);
7669
7670   // If the MSTORE data type requires splitting and the mask is provided by a
7671   // SETCC, then split both nodes and its operands before legalization. This
7672   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7673   // and enables future optimizations (e.g. min/max pattern matching on X86).
7674   if (Mask.getOpcode() == ISD::SETCC) {
7675     // Check if any splitting is required.
7676     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7677         TargetLowering::TypeSplitVector)
7678       return SDValue();
7679
7680     SDValue MaskLo, MaskHi, Lo, Hi;
7681     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7682
7683     SDValue Chain = MST->getChain();
7684     SDValue Ptr   = MST->getBasePtr();
7685
7686     EVT MemoryVT = MST->getMemoryVT();
7687     unsigned Alignment = MST->getOriginalAlignment();
7688
7689     // if Alignment is equal to the vector size,
7690     // take the half of it for the second part
7691     unsigned SecondHalfAlignment =
7692       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7693
7694     EVT LoMemVT, HiMemVT;
7695     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7696
7697     SDValue DataLo, DataHi;
7698     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7699
7700     MachineMemOperand *MMO = DAG.getMachineFunction().
7701       getMachineMemOperand(MST->getPointerInfo(),
7702                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7703                            Alignment, MST->getAAInfo(), MST->getRanges());
7704
7705     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7706                             MST->isTruncatingStore(),
7707                             MST->isCompressingStore());
7708
7709     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7710                                      MST->isCompressingStore());
7711     unsigned HiOffset = LoMemVT.getStoreSize();
7712
7713     MMO = DAG.getMachineFunction().getMachineMemOperand(
7714         MST->getPointerInfo().getWithOffset(HiOffset),
7715         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7716         MST->getAAInfo(), MST->getRanges());
7717
7718     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7719                             MST->isTruncatingStore(),
7720                             MST->isCompressingStore());
7721
7722     AddToWorklist(Lo.getNode());
7723     AddToWorklist(Hi.getNode());
7724
7725     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7726   }
7727   return SDValue();
7728 }
7729
7730 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7731   if (Level >= AfterLegalizeTypes)
7732     return SDValue();
7733
7734   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7735   SDValue Mask = MGT->getMask();
7736   SDLoc DL(N);
7737
7738   // If the MGATHER result requires splitting and the mask is provided by a
7739   // SETCC, then split both nodes and its operands before legalization. This
7740   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7741   // and enables future optimizations (e.g. min/max pattern matching on X86).
7742
7743   if (Mask.getOpcode() != ISD::SETCC)
7744     return SDValue();
7745
7746   EVT VT = N->getValueType(0);
7747
7748   // Check if any splitting is required.
7749   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7750       TargetLowering::TypeSplitVector)
7751     return SDValue();
7752
7753   SDValue MaskLo, MaskHi, Lo, Hi;
7754   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7755
7756   SDValue PassThru = MGT->getPassThru();
7757   SDValue PassThruLo, PassThruHi;
7758   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7759
7760   EVT LoVT, HiVT;
7761   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7762
7763   SDValue Chain = MGT->getChain();
7764   EVT MemoryVT = MGT->getMemoryVT();
7765   unsigned Alignment = MGT->getOriginalAlignment();
7766
7767   EVT LoMemVT, HiMemVT;
7768   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7769
7770   SDValue Scale = MGT->getScale();
7771   SDValue BasePtr = MGT->getBasePtr();
7772   SDValue Index = MGT->getIndex();
7773   SDValue IndexLo, IndexHi;
7774   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7775
7776   MachineMemOperand *MMO = DAG.getMachineFunction().
7777     getMachineMemOperand(MGT->getPointerInfo(),
7778                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7779                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7780
7781   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7782   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7783                            MMO);
7784
7785   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7786   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7787                            MMO);
7788
7789   AddToWorklist(Lo.getNode());
7790   AddToWorklist(Hi.getNode());
7791
7792   // Build a factor node to remember that this load is independent of the
7793   // other one.
7794   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7795                       Hi.getValue(1));
7796
7797   // Legalized the chain result - switch anything that used the old chain to
7798   // use the new one.
7799   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7800
7801   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7802
7803   SDValue RetOps[] = { GatherRes, Chain };
7804   return DAG.getMergeValues(RetOps, DL);
7805 }
7806
7807 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7808   if (Level >= AfterLegalizeTypes)
7809     return SDValue();
7810
7811   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7812   SDValue Mask = MLD->getMask();
7813   SDLoc DL(N);
7814
7815   // If the MLOAD result requires splitting and the mask is provided by a
7816   // SETCC, then split both nodes and its operands before legalization. This
7817   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7818   // and enables future optimizations (e.g. min/max pattern matching on X86).
7819   if (Mask.getOpcode() == ISD::SETCC) {
7820     EVT VT = N->getValueType(0);
7821
7822     // Check if any splitting is required.
7823     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7824         TargetLowering::TypeSplitVector)
7825       return SDValue();
7826
7827     SDValue MaskLo, MaskHi, Lo, Hi;
7828     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7829
7830     SDValue PassThru = MLD->getPassThru();
7831     SDValue PassThruLo, PassThruHi;
7832     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7833
7834     EVT LoVT, HiVT;
7835     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7836
7837     SDValue Chain = MLD->getChain();
7838     SDValue Ptr   = MLD->getBasePtr();
7839     EVT MemoryVT = MLD->getMemoryVT();
7840     unsigned Alignment = MLD->getOriginalAlignment();
7841
7842     // if Alignment is equal to the vector size,
7843     // take the half of it for the second part
7844     unsigned SecondHalfAlignment =
7845       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7846          Alignment/2 : Alignment;
7847
7848     EVT LoMemVT, HiMemVT;
7849     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7850
7851     MachineMemOperand *MMO = DAG.getMachineFunction().
7852     getMachineMemOperand(MLD->getPointerInfo(),
7853                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7854                          Alignment, MLD->getAAInfo(), MLD->getRanges());
7855
7856     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7857                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7858
7859     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7860                                      MLD->isExpandingLoad());
7861     unsigned HiOffset = LoMemVT.getStoreSize();
7862
7863     MMO = DAG.getMachineFunction().getMachineMemOperand(
7864         MLD->getPointerInfo().getWithOffset(HiOffset),
7865         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7866         MLD->getAAInfo(), MLD->getRanges());
7867
7868     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7869                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7870
7871     AddToWorklist(Lo.getNode());
7872     AddToWorklist(Hi.getNode());
7873
7874     // Build a factor node to remember that this load is independent of the
7875     // other one.
7876     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7877                         Hi.getValue(1));
7878
7879     // Legalized the chain result - switch anything that used the old chain to
7880     // use the new one.
7881     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7882
7883     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7884
7885     SDValue RetOps[] = { LoadRes, Chain };
7886     return DAG.getMergeValues(RetOps, DL);
7887   }
7888   return SDValue();
7889 }
7890
7891 /// A vector select of 2 constant vectors can be simplified to math/logic to
7892 /// avoid a variable select instruction and possibly avoid constant loads.
7893 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7894   SDValue Cond = N->getOperand(0);
7895   SDValue N1 = N->getOperand(1);
7896   SDValue N2 = N->getOperand(2);
7897   EVT VT = N->getValueType(0);
7898   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7899       !TLI.convertSelectOfConstantsToMath(VT) ||
7900       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7901       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7902     return SDValue();
7903
7904   // Check if we can use the condition value to increment/decrement a single
7905   // constant value. This simplifies a select to an add and removes a constant
7906   // load/materialization from the general case.
7907   bool AllAddOne = true;
7908   bool AllSubOne = true;
7909   unsigned Elts = VT.getVectorNumElements();
7910   for (unsigned i = 0; i != Elts; ++i) {
7911     SDValue N1Elt = N1.getOperand(i);
7912     SDValue N2Elt = N2.getOperand(i);
7913     if (N1Elt.isUndef() || N2Elt.isUndef())
7914       continue;
7915
7916     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7917     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7918     if (C1 != C2 + 1)
7919       AllAddOne = false;
7920     if (C1 != C2 - 1)
7921       AllSubOne = false;
7922   }
7923
7924   // Further simplifications for the extra-special cases where the constants are
7925   // all 0 or all -1 should be implemented as folds of these patterns.
7926   SDLoc DL(N);
7927   if (AllAddOne || AllSubOne) {
7928     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7929     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7930     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7931     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7932     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7933   }
7934
7935   // The general case for select-of-constants:
7936   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7937   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7938   // leave that to a machine-specific pass.
7939   return SDValue();
7940 }
7941
7942 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7943   SDValue N0 = N->getOperand(0);
7944   SDValue N1 = N->getOperand(1);
7945   SDValue N2 = N->getOperand(2);
7946   SDLoc DL(N);
7947
7948   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7949     return V;
7950
7951   // Canonicalize integer abs.
7952   // vselect (setg[te] X,  0),  X, -X ->
7953   // vselect (setgt    X, -1),  X, -X ->
7954   // vselect (setl[te] X,  0), -X,  X ->
7955   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7956   if (N0.getOpcode() == ISD::SETCC) {
7957     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7958     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7959     bool isAbs = false;
7960     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7961
7962     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7963          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7964         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7965       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7966     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7967              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7968       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7969
7970     if (isAbs) {
7971       EVT VT = LHS.getValueType();
7972       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7973         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7974
7975       SDValue Shift = DAG.getNode(
7976           ISD::SRA, DL, VT, LHS,
7977           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7978       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7979       AddToWorklist(Shift.getNode());
7980       AddToWorklist(Add.getNode());
7981       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7982     }
7983
7984     // vselect x, y (fcmp lt x, y) -> fminnum x, y
7985     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
7986     //
7987     // This is OK if we don't care about what happens if either operand is a
7988     // NaN.
7989     //
7990     EVT VT = N->getValueType(0);
7991     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
7992       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7993       if (SDValue FMinMax = combineMinNumMaxNum(
7994             DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7995         return FMinMax;
7996     }
7997
7998     // If this select has a condition (setcc) with narrower operands than the
7999     // select, try to widen the compare to match the select width.
8000     // TODO: This should be extended to handle any constant.
8001     // TODO: This could be extended to handle non-loading patterns, but that
8002     //       requires thorough testing to avoid regressions.
8003     if (isNullOrNullSplat(RHS)) {
8004       EVT NarrowVT = LHS.getValueType();
8005       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8006       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8007       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8008       unsigned WideWidth = WideVT.getScalarSizeInBits();
8009       bool IsSigned = isSignedIntSetCC(CC);
8010       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8011       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8012           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8013           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8014           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8015         // Both compare operands can be widened for free. The LHS can use an
8016         // extended load, and the RHS is a constant:
8017         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8018         //   vselect (setcc extload(X), C'), N1, N2
8019         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8020         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8021         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8022         EVT WideSetCCVT = getSetCCResultType(WideVT);
8023         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8024         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8025       }
8026     }
8027   }
8028
8029   if (SimplifySelectOps(N, N1, N2))
8030     return SDValue(N, 0);  // Don't revisit N.
8031
8032   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8033   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8034     return N1;
8035   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8036   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8037     return N2;
8038
8039   // The ConvertSelectToConcatVector function is assuming both the above
8040   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8041   // and addressed.
8042   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8043       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8044       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8045     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8046       return CV;
8047   }
8048
8049   if (SDValue V = foldVSelectOfConstants(N))
8050     return V;
8051
8052   return SDValue();
8053 }
8054
8055 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8056   SDValue N0 = N->getOperand(0);
8057   SDValue N1 = N->getOperand(1);
8058   SDValue N2 = N->getOperand(2);
8059   SDValue N3 = N->getOperand(3);
8060   SDValue N4 = N->getOperand(4);
8061   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8062
8063   // fold select_cc lhs, rhs, x, x, cc -> x
8064   if (N2 == N3)
8065     return N2;
8066
8067   // Determine if the condition we're dealing with is constant
8068   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8069                                   CC, SDLoc(N), false)) {
8070     AddToWorklist(SCC.getNode());
8071
8072     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8073       if (!SCCC->isNullValue())
8074         return N2;    // cond always true -> true val
8075       else
8076         return N3;    // cond always false -> false val
8077     } else if (SCC->isUndef()) {
8078       // When the condition is UNDEF, just return the first operand. This is
8079       // coherent the DAG creation, no setcc node is created in this case
8080       return N2;
8081     } else if (SCC.getOpcode() == ISD::SETCC) {
8082       // Fold to a simpler select_cc
8083       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8084                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8085                          SCC.getOperand(2));
8086     }
8087   }
8088
8089   // If we can fold this based on the true/false value, do so.
8090   if (SimplifySelectOps(N, N2, N3))
8091     return SDValue(N, 0);  // Don't revisit N.
8092
8093   // fold select_cc into other things, such as min/max/abs
8094   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8095 }
8096
8097 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8098   // setcc is very commonly used as an argument to brcond. This pattern
8099   // also lend itself to numerous combines and, as a result, it is desired
8100   // we keep the argument to a brcond as a setcc as much as possible.
8101   bool PreferSetCC =
8102       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8103
8104   SDValue Combined = SimplifySetCC(
8105       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8106       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8107
8108   if (!Combined)
8109     return SDValue();
8110
8111   // If we prefer to have a setcc, and we don't, we'll try our best to
8112   // recreate one using rebuildSetCC.
8113   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8114     SDValue NewSetCC = rebuildSetCC(Combined);
8115
8116     // We don't have anything interesting to combine to.
8117     if (NewSetCC.getNode() == N)
8118       return SDValue();
8119
8120     if (NewSetCC)
8121       return NewSetCC;
8122   }
8123
8124   return Combined;
8125 }
8126
8127 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8128   SDValue LHS = N->getOperand(0);
8129   SDValue RHS = N->getOperand(1);
8130   SDValue Carry = N->getOperand(2);
8131   SDValue Cond = N->getOperand(3);
8132
8133   // If Carry is false, fold to a regular SETCC.
8134   if (isNullConstant(Carry))
8135     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8136
8137   return SDValue();
8138 }
8139
8140 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8141 /// a build_vector of constants.
8142 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8143 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8144 /// Vector extends are not folded if operations are legal; this is to
8145 /// avoid introducing illegal build_vector dag nodes.
8146 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8147                                          SelectionDAG &DAG, bool LegalTypes) {
8148   unsigned Opcode = N->getOpcode();
8149   SDValue N0 = N->getOperand(0);
8150   EVT VT = N->getValueType(0);
8151
8152   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8153          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8154          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8155          && "Expected EXTEND dag node in input!");
8156
8157   // fold (sext c1) -> c1
8158   // fold (zext c1) -> c1
8159   // fold (aext c1) -> c1
8160   if (isa<ConstantSDNode>(N0))
8161     return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8162
8163   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8164   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8165   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8166   EVT SVT = VT.getScalarType();
8167   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8168       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8169     return SDValue();
8170
8171   // We can fold this node into a build_vector.
8172   unsigned VTBits = SVT.getSizeInBits();
8173   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8174   SmallVector<SDValue, 8> Elts;
8175   unsigned NumElts = VT.getVectorNumElements();
8176   SDLoc DL(N);
8177
8178   // For zero-extensions, UNDEF elements still guarantee to have the upper
8179   // bits set to zero.
8180   bool IsZext =
8181       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8182
8183   for (unsigned i = 0; i != NumElts; ++i) {
8184     SDValue Op = N0.getOperand(i);
8185     if (Op.isUndef()) {
8186       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8187       continue;
8188     }
8189
8190     SDLoc DL(Op);
8191     // Get the constant value and if needed trunc it to the size of the type.
8192     // Nodes like build_vector might have constants wider than the scalar type.
8193     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8194     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8195       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8196     else
8197       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8198   }
8199
8200   return DAG.getBuildVector(VT, DL, Elts);
8201 }
8202
8203 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8204 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8205 // transformation. Returns true if extension are possible and the above
8206 // mentioned transformation is profitable.
8207 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8208                                     unsigned ExtOpc,
8209                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8210                                     const TargetLowering &TLI) {
8211   bool HasCopyToRegUses = false;
8212   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8213   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8214                             UE = N0.getNode()->use_end();
8215        UI != UE; ++UI) {
8216     SDNode *User = *UI;
8217     if (User == N)
8218       continue;
8219     if (UI.getUse().getResNo() != N0.getResNo())
8220       continue;
8221     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8222     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8223       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8224       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8225         // Sign bits will be lost after a zext.
8226         return false;
8227       bool Add = false;
8228       for (unsigned i = 0; i != 2; ++i) {
8229         SDValue UseOp = User->getOperand(i);
8230         if (UseOp == N0)
8231           continue;
8232         if (!isa<ConstantSDNode>(UseOp))
8233           return false;
8234         Add = true;
8235       }
8236       if (Add)
8237         ExtendNodes.push_back(User);
8238       continue;
8239     }
8240     // If truncates aren't free and there are users we can't
8241     // extend, it isn't worthwhile.
8242     if (!isTruncFree)
8243       return false;
8244     // Remember if this value is live-out.
8245     if (User->getOpcode() == ISD::CopyToReg)
8246       HasCopyToRegUses = true;
8247   }
8248
8249   if (HasCopyToRegUses) {
8250     bool BothLiveOut = false;
8251     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8252          UI != UE; ++UI) {
8253       SDUse &Use = UI.getUse();
8254       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8255         BothLiveOut = true;
8256         break;
8257       }
8258     }
8259     if (BothLiveOut)
8260       // Both unextended and extended values are live out. There had better be
8261       // a good reason for the transformation.
8262       return ExtendNodes.size();
8263   }
8264   return true;
8265 }
8266
8267 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8268                                   SDValue OrigLoad, SDValue ExtLoad,
8269                                   ISD::NodeType ExtType) {
8270   // Extend SetCC uses if necessary.
8271   SDLoc DL(ExtLoad);
8272   for (SDNode *SetCC : SetCCs) {
8273     SmallVector<SDValue, 4> Ops;
8274
8275     for (unsigned j = 0; j != 2; ++j) {
8276       SDValue SOp = SetCC->getOperand(j);
8277       if (SOp == OrigLoad)
8278         Ops.push_back(ExtLoad);
8279       else
8280         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8281     }
8282
8283     Ops.push_back(SetCC->getOperand(2));
8284     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8285   }
8286 }
8287
8288 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8289 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8290   SDValue N0 = N->getOperand(0);
8291   EVT DstVT = N->getValueType(0);
8292   EVT SrcVT = N0.getValueType();
8293
8294   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8295           N->getOpcode() == ISD::ZERO_EXTEND) &&
8296          "Unexpected node type (not an extend)!");
8297
8298   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8299   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8300   //   (v8i32 (sext (v8i16 (load x))))
8301   // into:
8302   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8303   //                          (v4i32 (sextload (x + 16)))))
8304   // Where uses of the original load, i.e.:
8305   //   (v8i16 (load x))
8306   // are replaced with:
8307   //   (v8i16 (truncate
8308   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8309   //                            (v4i32 (sextload (x + 16)))))))
8310   //
8311   // This combine is only applicable to illegal, but splittable, vectors.
8312   // All legal types, and illegal non-vector types, are handled elsewhere.
8313   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8314   //
8315   if (N0->getOpcode() != ISD::LOAD)
8316     return SDValue();
8317
8318   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8319
8320   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8321       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8322       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8323     return SDValue();
8324
8325   SmallVector<SDNode *, 4> SetCCs;
8326   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8327     return SDValue();
8328
8329   ISD::LoadExtType ExtType =
8330       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8331
8332   // Try to split the vector types to get down to legal types.
8333   EVT SplitSrcVT = SrcVT;
8334   EVT SplitDstVT = DstVT;
8335   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8336          SplitSrcVT.getVectorNumElements() > 1) {
8337     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8338     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8339   }
8340
8341   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8342     return SDValue();
8343
8344   SDLoc DL(N);
8345   const unsigned NumSplits =
8346       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8347   const unsigned Stride = SplitSrcVT.getStoreSize();
8348   SmallVector<SDValue, 4> Loads;
8349   SmallVector<SDValue, 4> Chains;
8350
8351   SDValue BasePtr = LN0->getBasePtr();
8352   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8353     const unsigned Offset = Idx * Stride;
8354     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8355
8356     SDValue SplitLoad = DAG.getExtLoad(
8357         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8358         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8359         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8360
8361     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8362                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8363
8364     Loads.push_back(SplitLoad.getValue(0));
8365     Chains.push_back(SplitLoad.getValue(1));
8366   }
8367
8368   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8369   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8370
8371   // Simplify TF.
8372   AddToWorklist(NewChain.getNode());
8373
8374   CombineTo(N, NewValue);
8375
8376   // Replace uses of the original load (before extension)
8377   // with a truncate of the concatenated sextloaded vectors.
8378   SDValue Trunc =
8379       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8380   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8381   CombineTo(N0.getNode(), Trunc, NewChain);
8382   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8383 }
8384
8385 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8386 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8387 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8388   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8389   EVT VT = N->getValueType(0);
8390
8391   // and/or/xor
8392   SDValue N0 = N->getOperand(0);
8393   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8394         N0.getOpcode() == ISD::XOR) ||
8395       N0.getOperand(1).getOpcode() != ISD::Constant ||
8396       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8397     return SDValue();
8398
8399   // shl/shr
8400   SDValue N1 = N0->getOperand(0);
8401   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8402       N1.getOperand(1).getOpcode() != ISD::Constant ||
8403       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8404     return SDValue();
8405
8406   // load
8407   if (!isa<LoadSDNode>(N1.getOperand(0)))
8408     return SDValue();
8409   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8410   EVT MemVT = Load->getMemoryVT();
8411   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8412       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8413     return SDValue();
8414
8415
8416   // If the shift op is SHL, the logic op must be AND, otherwise the result
8417   // will be wrong.
8418   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8419     return SDValue();
8420
8421   if (!N0.hasOneUse() || !N1.hasOneUse())
8422     return SDValue();
8423
8424   SmallVector<SDNode*, 4> SetCCs;
8425   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8426                                ISD::ZERO_EXTEND, SetCCs, TLI))
8427     return SDValue();
8428
8429   // Actually do the transformation.
8430   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8431                                    Load->getChain(), Load->getBasePtr(),
8432                                    Load->getMemoryVT(), Load->getMemOperand());
8433
8434   SDLoc DL1(N1);
8435   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8436                               N1.getOperand(1));
8437
8438   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8439   Mask = Mask.zext(VT.getSizeInBits());
8440   SDLoc DL0(N0);
8441   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8442                             DAG.getConstant(Mask, DL0, VT));
8443
8444   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8445   CombineTo(N, And);
8446   if (SDValue(Load, 0).hasOneUse()) {
8447     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8448   } else {
8449     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8450                                 Load->getValueType(0), ExtLoad);
8451     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8452   }
8453   return SDValue(N,0); // Return N so it doesn't get rechecked!
8454 }
8455
8456 /// If we're narrowing or widening the result of a vector select and the final
8457 /// size is the same size as a setcc (compare) feeding the select, then try to
8458 /// apply the cast operation to the select's operands because matching vector
8459 /// sizes for a select condition and other operands should be more efficient.
8460 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8461   unsigned CastOpcode = Cast->getOpcode();
8462   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8463           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8464           CastOpcode == ISD::FP_ROUND) &&
8465          "Unexpected opcode for vector select narrowing/widening");
8466
8467   // We only do this transform before legal ops because the pattern may be
8468   // obfuscated by target-specific operations after legalization. Do not create
8469   // an illegal select op, however, because that may be difficult to lower.
8470   EVT VT = Cast->getValueType(0);
8471   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8472     return SDValue();
8473
8474   SDValue VSel = Cast->getOperand(0);
8475   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8476       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8477     return SDValue();
8478
8479   // Does the setcc have the same vector size as the casted select?
8480   SDValue SetCC = VSel.getOperand(0);
8481   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8482   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8483     return SDValue();
8484
8485   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8486   SDValue A = VSel.getOperand(1);
8487   SDValue B = VSel.getOperand(2);
8488   SDValue CastA, CastB;
8489   SDLoc DL(Cast);
8490   if (CastOpcode == ISD::FP_ROUND) {
8491     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8492     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8493     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8494   } else {
8495     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8496     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8497   }
8498   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8499 }
8500
8501 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8502 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8503 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8504                                      const TargetLowering &TLI, EVT VT,
8505                                      bool LegalOperations, SDNode *N,
8506                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8507   SDNode *N0Node = N0.getNode();
8508   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8509                                                    : ISD::isZEXTLoad(N0Node);
8510   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8511       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8512     return {};
8513
8514   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8515   EVT MemVT = LN0->getMemoryVT();
8516   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8517       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8518     return {};
8519
8520   SDValue ExtLoad =
8521       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8522                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8523   Combiner.CombineTo(N, ExtLoad);
8524   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8525   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8526 }
8527
8528 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8529 // Only generate vector extloads when 1) they're legal, and 2) they are
8530 // deemed desirable by the target.
8531 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8532                                   const TargetLowering &TLI, EVT VT,
8533                                   bool LegalOperations, SDNode *N, SDValue N0,
8534                                   ISD::LoadExtType ExtLoadType,
8535                                   ISD::NodeType ExtOpc) {
8536   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8537       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8538       ((LegalOperations || VT.isVector() ||
8539         cast<LoadSDNode>(N0)->isVolatile()) &&
8540        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8541     return {};
8542
8543   bool DoXform = true;
8544   SmallVector<SDNode *, 4> SetCCs;
8545   if (!N0.hasOneUse())
8546     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8547   if (VT.isVector())
8548     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8549   if (!DoXform)
8550     return {};
8551
8552   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8553   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8554                                    LN0->getBasePtr(), N0.getValueType(),
8555                                    LN0->getMemOperand());
8556   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8557   // If the load value is used only by N, replace it via CombineTo N.
8558   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8559   Combiner.CombineTo(N, ExtLoad);
8560   if (NoReplaceTrunc) {
8561     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8562   } else {
8563     SDValue Trunc =
8564         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8565     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8566   }
8567   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8568 }
8569
8570 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8571                                        bool LegalOperations) {
8572   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8573           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8574
8575   SDValue SetCC = N->getOperand(0);
8576   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8577       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8578     return SDValue();
8579
8580   SDValue X = SetCC.getOperand(0);
8581   SDValue Ones = SetCC.getOperand(1);
8582   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8583   EVT VT = N->getValueType(0);
8584   EVT XVT = X.getValueType();
8585   // setge X, C is canonicalized to setgt, so we do not need to match that
8586   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8587   // not require the 'not' op.
8588   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8589     // Invert and smear/shift the sign bit:
8590     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8591     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8592     SDLoc DL(N);
8593     SDValue NotX = DAG.getNOT(DL, X, VT);
8594     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8595     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8596     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8597   }
8598   return SDValue();
8599 }
8600
8601 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8602   SDValue N0 = N->getOperand(0);
8603   EVT VT = N->getValueType(0);
8604   SDLoc DL(N);
8605
8606   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8607     return Res;
8608
8609   // fold (sext (sext x)) -> (sext x)
8610   // fold (sext (aext x)) -> (sext x)
8611   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8612     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8613
8614   if (N0.getOpcode() == ISD::TRUNCATE) {
8615     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8616     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8617     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8618       SDNode *oye = N0.getOperand(0).getNode();
8619       if (NarrowLoad.getNode() != N0.getNode()) {
8620         CombineTo(N0.getNode(), NarrowLoad);
8621         // CombineTo deleted the truncate, if needed, but not what's under it.
8622         AddToWorklist(oye);
8623       }
8624       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8625     }
8626
8627     // See if the value being truncated is already sign extended.  If so, just
8628     // eliminate the trunc/sext pair.
8629     SDValue Op = N0.getOperand(0);
8630     unsigned OpBits   = Op.getScalarValueSizeInBits();
8631     unsigned MidBits  = N0.getScalarValueSizeInBits();
8632     unsigned DestBits = VT.getScalarSizeInBits();
8633     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8634
8635     if (OpBits == DestBits) {
8636       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8637       // bits, it is already ready.
8638       if (NumSignBits > DestBits-MidBits)
8639         return Op;
8640     } else if (OpBits < DestBits) {
8641       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8642       // bits, just sext from i32.
8643       if (NumSignBits > OpBits-MidBits)
8644         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8645     } else {
8646       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8647       // bits, just truncate to i32.
8648       if (NumSignBits > OpBits-MidBits)
8649         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8650     }
8651
8652     // fold (sext (truncate x)) -> (sextinreg x).
8653     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8654                                                  N0.getValueType())) {
8655       if (OpBits < DestBits)
8656         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8657       else if (OpBits > DestBits)
8658         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8659       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8660                          DAG.getValueType(N0.getValueType()));
8661     }
8662   }
8663
8664   // Try to simplify (sext (load x)).
8665   if (SDValue foldedExt =
8666           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8667                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8668     return foldedExt;
8669
8670   // fold (sext (load x)) to multiple smaller sextloads.
8671   // Only on illegal but splittable vectors.
8672   if (SDValue ExtLoad = CombineExtLoad(N))
8673     return ExtLoad;
8674
8675   // Try to simplify (sext (sextload x)).
8676   if (SDValue foldedExt = tryToFoldExtOfExtload(
8677           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8678     return foldedExt;
8679
8680   // fold (sext (and/or/xor (load x), cst)) ->
8681   //      (and/or/xor (sextload x), (sext cst))
8682   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8683        N0.getOpcode() == ISD::XOR) &&
8684       isa<LoadSDNode>(N0.getOperand(0)) &&
8685       N0.getOperand(1).getOpcode() == ISD::Constant &&
8686       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8687     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8688     EVT MemVT = LN00->getMemoryVT();
8689     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8690       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8691       SmallVector<SDNode*, 4> SetCCs;
8692       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8693                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8694       if (DoXform) {
8695         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8696                                          LN00->getChain(), LN00->getBasePtr(),
8697                                          LN00->getMemoryVT(),
8698                                          LN00->getMemOperand());
8699         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8700         Mask = Mask.sext(VT.getSizeInBits());
8701         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8702                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8703         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8704         bool NoReplaceTruncAnd = !N0.hasOneUse();
8705         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8706         CombineTo(N, And);
8707         // If N0 has multiple uses, change other uses as well.
8708         if (NoReplaceTruncAnd) {
8709           SDValue TruncAnd =
8710               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8711           CombineTo(N0.getNode(), TruncAnd);
8712         }
8713         if (NoReplaceTrunc) {
8714           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8715         } else {
8716           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8717                                       LN00->getValueType(0), ExtLoad);
8718           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8719         }
8720         return SDValue(N,0); // Return N so it doesn't get rechecked!
8721       }
8722     }
8723   }
8724
8725   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8726     return V;
8727
8728   if (N0.getOpcode() == ISD::SETCC) {
8729     SDValue N00 = N0.getOperand(0);
8730     SDValue N01 = N0.getOperand(1);
8731     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8732     EVT N00VT = N0.getOperand(0).getValueType();
8733
8734     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8735     // Only do this before legalize for now.
8736     if (VT.isVector() && !LegalOperations &&
8737         TLI.getBooleanContents(N00VT) ==
8738             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8739       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8740       // of the same size as the compared operands. Only optimize sext(setcc())
8741       // if this is the case.
8742       EVT SVT = getSetCCResultType(N00VT);
8743
8744       // If we already have the desired type, don't change it.
8745       if (SVT != N0.getValueType()) {
8746         // We know that the # elements of the results is the same as the
8747         // # elements of the compare (and the # elements of the compare result
8748         // for that matter).  Check to see that they are the same size.  If so,
8749         // we know that the element size of the sext'd result matches the
8750         // element size of the compare operands.
8751         if (VT.getSizeInBits() == SVT.getSizeInBits())
8752           return DAG.getSetCC(DL, VT, N00, N01, CC);
8753
8754         // If the desired elements are smaller or larger than the source
8755         // elements, we can use a matching integer vector type and then
8756         // truncate/sign extend.
8757         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8758         if (SVT == MatchingVecType) {
8759           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8760           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8761         }
8762       }
8763     }
8764
8765     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8766     // Here, T can be 1 or -1, depending on the type of the setcc and
8767     // getBooleanContents().
8768     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8769
8770     // To determine the "true" side of the select, we need to know the high bit
8771     // of the value returned by the setcc if it evaluates to true.
8772     // If the type of the setcc is i1, then the true case of the select is just
8773     // sext(i1 1), that is, -1.
8774     // If the type of the setcc is larger (say, i8) then the value of the high
8775     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8776     // of the appropriate width.
8777     SDValue ExtTrueVal = (SetCCWidth == 1)
8778                              ? DAG.getAllOnesConstant(DL, VT)
8779                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8780     SDValue Zero = DAG.getConstant(0, DL, VT);
8781     if (SDValue SCC =
8782             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8783       return SCC;
8784
8785     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8786       EVT SetCCVT = getSetCCResultType(N00VT);
8787       // Don't do this transform for i1 because there's a select transform
8788       // that would reverse it.
8789       // TODO: We should not do this transform at all without a target hook
8790       // because a sext is likely cheaper than a select?
8791       if (SetCCVT.getScalarSizeInBits() != 1 &&
8792           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8793         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8794         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8795       }
8796     }
8797   }
8798
8799   // fold (sext x) -> (zext x) if the sign bit is known zero.
8800   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8801       DAG.SignBitIsZero(N0))
8802     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8803
8804   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8805     return NewVSel;
8806
8807   return SDValue();
8808 }
8809
8810 // isTruncateOf - If N is a truncate of some other value, return true, record
8811 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8812 // This function computes KnownBits to avoid a duplicated call to
8813 // computeKnownBits in the caller.
8814 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8815                          KnownBits &Known) {
8816   if (N->getOpcode() == ISD::TRUNCATE) {
8817     Op = N->getOperand(0);
8818     Known = DAG.computeKnownBits(Op);
8819     return true;
8820   }
8821
8822   if (N.getOpcode() != ISD::SETCC ||
8823       N.getValueType().getScalarType() != MVT::i1 ||
8824       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
8825     return false;
8826
8827   SDValue Op0 = N->getOperand(0);
8828   SDValue Op1 = N->getOperand(1);
8829   assert(Op0.getValueType() == Op1.getValueType());
8830
8831   if (isNullOrNullSplat(Op0))
8832     Op = Op1;
8833   else if (isNullOrNullSplat(Op1))
8834     Op = Op0;
8835   else
8836     return false;
8837
8838   Known = DAG.computeKnownBits(Op);
8839
8840   return (Known.Zero | 1).isAllOnesValue();
8841 }
8842
8843 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8844   SDValue N0 = N->getOperand(0);
8845   EVT VT = N->getValueType(0);
8846
8847   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8848     return Res;
8849
8850   // fold (zext (zext x)) -> (zext x)
8851   // fold (zext (aext x)) -> (zext x)
8852   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8853     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8854                        N0.getOperand(0));
8855
8856   // fold (zext (truncate x)) -> (zext x) or
8857   //      (zext (truncate x)) -> (truncate x)
8858   // This is valid when the truncated bits of x are already zero.
8859   SDValue Op;
8860   KnownBits Known;
8861   if (isTruncateOf(DAG, N0, Op, Known)) {
8862     APInt TruncatedBits =
8863       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
8864       APInt(Op.getScalarValueSizeInBits(), 0) :
8865       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
8866                         N0.getScalarValueSizeInBits(),
8867                         std::min(Op.getScalarValueSizeInBits(),
8868                                  VT.getScalarSizeInBits()));
8869     if (TruncatedBits.isSubsetOf(Known.Zero))
8870       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8871   }
8872
8873   // fold (zext (truncate x)) -> (and x, mask)
8874   if (N0.getOpcode() == ISD::TRUNCATE) {
8875     // fold (zext (truncate (load x))) -> (zext (smaller load x))
8876     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8877     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8878       SDNode *oye = N0.getOperand(0).getNode();
8879       if (NarrowLoad.getNode() != N0.getNode()) {
8880         CombineTo(N0.getNode(), NarrowLoad);
8881         // CombineTo deleted the truncate, if needed, but not what's under it.
8882         AddToWorklist(oye);
8883       }
8884       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8885     }
8886
8887     EVT SrcVT = N0.getOperand(0).getValueType();
8888     EVT MinVT = N0.getValueType();
8889
8890     // Try to mask before the extension to avoid having to generate a larger mask,
8891     // possibly over several sub-vectors.
8892     if (SrcVT.bitsLT(VT) && VT.isVector()) {
8893       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8894                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8895         SDValue Op = N0.getOperand(0);
8896         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8897         AddToWorklist(Op.getNode());
8898         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8899         // Transfer the debug info; the new node is equivalent to N0.
8900         DAG.transferDbgValues(N0, ZExtOrTrunc);
8901         return ZExtOrTrunc;
8902       }
8903     }
8904
8905     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8906       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8907       AddToWorklist(Op.getNode());
8908       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8909       // We may safely transfer the debug info describing the truncate node over
8910       // to the equivalent and operation.
8911       DAG.transferDbgValues(N0, And);
8912       return And;
8913     }
8914   }
8915
8916   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8917   // if either of the casts is not free.
8918   if (N0.getOpcode() == ISD::AND &&
8919       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8920       N0.getOperand(1).getOpcode() == ISD::Constant &&
8921       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8922                            N0.getValueType()) ||
8923        !TLI.isZExtFree(N0.getValueType(), VT))) {
8924     SDValue X = N0.getOperand(0).getOperand(0);
8925     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8926     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8927     Mask = Mask.zext(VT.getSizeInBits());
8928     SDLoc DL(N);
8929     return DAG.getNode(ISD::AND, DL, VT,
8930                        X, DAG.getConstant(Mask, DL, VT));
8931   }
8932
8933   // Try to simplify (zext (load x)).
8934   if (SDValue foldedExt =
8935           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8936                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8937     return foldedExt;
8938
8939   // fold (zext (load x)) to multiple smaller zextloads.
8940   // Only on illegal but splittable vectors.
8941   if (SDValue ExtLoad = CombineExtLoad(N))
8942     return ExtLoad;
8943
8944   // fold (zext (and/or/xor (load x), cst)) ->
8945   //      (and/or/xor (zextload x), (zext cst))
8946   // Unless (and (load x) cst) will match as a zextload already and has
8947   // additional users.
8948   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8949        N0.getOpcode() == ISD::XOR) &&
8950       isa<LoadSDNode>(N0.getOperand(0)) &&
8951       N0.getOperand(1).getOpcode() == ISD::Constant &&
8952       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8953     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8954     EVT MemVT = LN00->getMemoryVT();
8955     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8956         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8957       bool DoXform = true;
8958       SmallVector<SDNode*, 4> SetCCs;
8959       if (!N0.hasOneUse()) {
8960         if (N0.getOpcode() == ISD::AND) {
8961           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8962           EVT LoadResultTy = AndC->getValueType(0);
8963           EVT ExtVT;
8964           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8965             DoXform = false;
8966         }
8967       }
8968       if (DoXform)
8969         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8970                                           ISD::ZERO_EXTEND, SetCCs, TLI);
8971       if (DoXform) {
8972         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8973                                          LN00->getChain(), LN00->getBasePtr(),
8974                                          LN00->getMemoryVT(),
8975                                          LN00->getMemOperand());
8976         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8977         Mask = Mask.zext(VT.getSizeInBits());
8978         SDLoc DL(N);
8979         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8980                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8981         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8982         bool NoReplaceTruncAnd = !N0.hasOneUse();
8983         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8984         CombineTo(N, And);
8985         // If N0 has multiple uses, change other uses as well.
8986         if (NoReplaceTruncAnd) {
8987           SDValue TruncAnd =
8988               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8989           CombineTo(N0.getNode(), TruncAnd);
8990         }
8991         if (NoReplaceTrunc) {
8992           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8993         } else {
8994           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8995                                       LN00->getValueType(0), ExtLoad);
8996           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8997         }
8998         return SDValue(N,0); // Return N so it doesn't get rechecked!
8999       }
9000     }
9001   }
9002
9003   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9004   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9005   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9006     return ZExtLoad;
9007
9008   // Try to simplify (zext (zextload x)).
9009   if (SDValue foldedExt = tryToFoldExtOfExtload(
9010           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9011     return foldedExt;
9012
9013   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9014     return V;
9015
9016   if (N0.getOpcode() == ISD::SETCC) {
9017     // Only do this before legalize for now.
9018     if (!LegalOperations && VT.isVector() &&
9019         N0.getValueType().getVectorElementType() == MVT::i1) {
9020       EVT N00VT = N0.getOperand(0).getValueType();
9021       if (getSetCCResultType(N00VT) == N0.getValueType())
9022         return SDValue();
9023
9024       // We know that the # elements of the results is the same as the #
9025       // elements of the compare (and the # elements of the compare result for
9026       // that matter). Check to see that they are the same size. If so, we know
9027       // that the element size of the sext'd result matches the element size of
9028       // the compare operands.
9029       SDLoc DL(N);
9030       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9031       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9032         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9033         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9034                                      N0.getOperand(1), N0.getOperand(2));
9035         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9036       }
9037
9038       // If the desired elements are smaller or larger than the source
9039       // elements we can use a matching integer vector type and then
9040       // truncate/sign extend.
9041       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9042       SDValue VsetCC =
9043           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9044                       N0.getOperand(1), N0.getOperand(2));
9045       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9046                          VecOnes);
9047     }
9048
9049     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9050     SDLoc DL(N);
9051     if (SDValue SCC = SimplifySelectCC(
9052             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9053             DAG.getConstant(0, DL, VT),
9054             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9055       return SCC;
9056   }
9057
9058   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9059   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9060       isa<ConstantSDNode>(N0.getOperand(1)) &&
9061       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9062       N0.hasOneUse()) {
9063     SDValue ShAmt = N0.getOperand(1);
9064     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9065     if (N0.getOpcode() == ISD::SHL) {
9066       SDValue InnerZExt = N0.getOperand(0);
9067       // If the original shl may be shifting out bits, do not perform this
9068       // transformation.
9069       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9070         InnerZExt.getOperand(0).getValueSizeInBits();
9071       if (ShAmtVal > KnownZeroBits)
9072         return SDValue();
9073     }
9074
9075     SDLoc DL(N);
9076
9077     // Ensure that the shift amount is wide enough for the shifted value.
9078     if (VT.getSizeInBits() >= 256)
9079       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9080
9081     return DAG.getNode(N0.getOpcode(), DL, VT,
9082                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9083                        ShAmt);
9084   }
9085
9086   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9087     return NewVSel;
9088
9089   return SDValue();
9090 }
9091
9092 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9093   SDValue N0 = N->getOperand(0);
9094   EVT VT = N->getValueType(0);
9095
9096   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9097     return Res;
9098
9099   // fold (aext (aext x)) -> (aext x)
9100   // fold (aext (zext x)) -> (zext x)
9101   // fold (aext (sext x)) -> (sext x)
9102   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9103       N0.getOpcode() == ISD::ZERO_EXTEND ||
9104       N0.getOpcode() == ISD::SIGN_EXTEND)
9105     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9106
9107   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9108   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9109   if (N0.getOpcode() == ISD::TRUNCATE) {
9110     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9111       SDNode *oye = N0.getOperand(0).getNode();
9112       if (NarrowLoad.getNode() != N0.getNode()) {
9113         CombineTo(N0.getNode(), NarrowLoad);
9114         // CombineTo deleted the truncate, if needed, but not what's under it.
9115         AddToWorklist(oye);
9116       }
9117       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9118     }
9119   }
9120
9121   // fold (aext (truncate x))
9122   if (N0.getOpcode() == ISD::TRUNCATE)
9123     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9124
9125   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9126   // if the trunc is not free.
9127   if (N0.getOpcode() == ISD::AND &&
9128       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9129       N0.getOperand(1).getOpcode() == ISD::Constant &&
9130       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9131                           N0.getValueType())) {
9132     SDLoc DL(N);
9133     SDValue X = N0.getOperand(0).getOperand(0);
9134     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9135     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9136     Mask = Mask.zext(VT.getSizeInBits());
9137     return DAG.getNode(ISD::AND, DL, VT,
9138                        X, DAG.getConstant(Mask, DL, VT));
9139   }
9140
9141   // fold (aext (load x)) -> (aext (truncate (extload x)))
9142   // None of the supported targets knows how to perform load and any_ext
9143   // on vectors in one instruction.  We only perform this transformation on
9144   // scalars.
9145   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9146       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9147       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9148     bool DoXform = true;
9149     SmallVector<SDNode*, 4> SetCCs;
9150     if (!N0.hasOneUse())
9151       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9152                                         TLI);
9153     if (DoXform) {
9154       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9155       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9156                                        LN0->getChain(),
9157                                        LN0->getBasePtr(), N0.getValueType(),
9158                                        LN0->getMemOperand());
9159       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9160       // If the load value is used only by N, replace it via CombineTo N.
9161       bool NoReplaceTrunc = N0.hasOneUse();
9162       CombineTo(N, ExtLoad);
9163       if (NoReplaceTrunc) {
9164         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9165       } else {
9166         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9167                                     N0.getValueType(), ExtLoad);
9168         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9169       }
9170       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9171     }
9172   }
9173
9174   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9175   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9176   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
9177   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9178       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9179     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9180     ISD::LoadExtType ExtType = LN0->getExtensionType();
9181     EVT MemVT = LN0->getMemoryVT();
9182     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9183       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9184                                        VT, LN0->getChain(), LN0->getBasePtr(),
9185                                        MemVT, LN0->getMemOperand());
9186       CombineTo(N, ExtLoad);
9187       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9188       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9189     }
9190   }
9191
9192   if (N0.getOpcode() == ISD::SETCC) {
9193     // For vectors:
9194     // aext(setcc) -> vsetcc
9195     // aext(setcc) -> truncate(vsetcc)
9196     // aext(setcc) -> aext(vsetcc)
9197     // Only do this before legalize for now.
9198     if (VT.isVector() && !LegalOperations) {
9199       EVT N00VT = N0.getOperand(0).getValueType();
9200       if (getSetCCResultType(N00VT) == N0.getValueType())
9201         return SDValue();
9202
9203       // We know that the # elements of the results is the same as the
9204       // # elements of the compare (and the # elements of the compare result
9205       // for that matter).  Check to see that they are the same size.  If so,
9206       // we know that the element size of the sext'd result matches the
9207       // element size of the compare operands.
9208       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9209         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9210                              N0.getOperand(1),
9211                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9212
9213       // If the desired elements are smaller or larger than the source
9214       // elements we can use a matching integer vector type and then
9215       // truncate/any extend
9216       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9217       SDValue VsetCC =
9218         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9219                       N0.getOperand(1),
9220                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9221       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9222     }
9223
9224     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9225     SDLoc DL(N);
9226     if (SDValue SCC = SimplifySelectCC(
9227             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9228             DAG.getConstant(0, DL, VT),
9229             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9230       return SCC;
9231   }
9232
9233   return SDValue();
9234 }
9235
9236 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9237   unsigned Opcode = N->getOpcode();
9238   SDValue N0 = N->getOperand(0);
9239   SDValue N1 = N->getOperand(1);
9240   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9241
9242   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9243   if (N0.getOpcode() == Opcode &&
9244       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9245     return N0;
9246
9247   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9248       N0.getOperand(0).getOpcode() == Opcode) {
9249     // We have an assert, truncate, assert sandwich. Make one stronger assert
9250     // by asserting on the smallest asserted type to the larger source type.
9251     // This eliminates the later assert:
9252     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9253     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9254     SDValue BigA = N0.getOperand(0);
9255     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9256     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9257            "Asserting zero/sign-extended bits to a type larger than the "
9258            "truncated destination does not provide information");
9259
9260     SDLoc DL(N);
9261     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9262     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9263     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9264                                     BigA.getOperand(0), MinAssertVTVal);
9265     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9266   }
9267
9268   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9269   // than X. Just move the AssertZext in front of the truncate and drop the
9270   // AssertSExt.
9271   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9272       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9273       Opcode == ISD::AssertZext) {
9274     SDValue BigA = N0.getOperand(0);
9275     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9276     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9277            "Asserting zero/sign-extended bits to a type larger than the "
9278            "truncated destination does not provide information");
9279
9280     if (AssertVT.bitsLT(BigA_AssertVT)) {
9281       SDLoc DL(N);
9282       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9283                                       BigA.getOperand(0), N1);
9284       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9285     }
9286   }
9287
9288   return SDValue();
9289 }
9290
9291 /// If the result of a wider load is shifted to right of N  bits and then
9292 /// truncated to a narrower type and where N is a multiple of number of bits of
9293 /// the narrower type, transform it to a narrower load from address + N / num of
9294 /// bits of new type. Also narrow the load if the result is masked with an AND
9295 /// to effectively produce a smaller type. If the result is to be extended, also
9296 /// fold the extension to form a extending load.
9297 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9298   unsigned Opc = N->getOpcode();
9299
9300   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9301   SDValue N0 = N->getOperand(0);
9302   EVT VT = N->getValueType(0);
9303   EVT ExtVT = VT;
9304
9305   // This transformation isn't valid for vector loads.
9306   if (VT.isVector())
9307     return SDValue();
9308
9309   unsigned ShAmt = 0;
9310   bool HasShiftedOffset = false;
9311   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9312   // extended to VT.
9313   if (Opc == ISD::SIGN_EXTEND_INREG) {
9314     ExtType = ISD::SEXTLOAD;
9315     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9316   } else if (Opc == ISD::SRL) {
9317     // Another special-case: SRL is basically zero-extending a narrower value,
9318     // or it maybe shifting a higher subword, half or byte into the lowest
9319     // bits.
9320     ExtType = ISD::ZEXTLOAD;
9321     N0 = SDValue(N, 0);
9322
9323     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9324     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9325     if (!N01 || !LN0)
9326       return SDValue();
9327
9328     uint64_t ShiftAmt = N01->getZExtValue();
9329     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9330     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9331       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9332     else
9333       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9334                                 VT.getSizeInBits() - ShiftAmt);
9335   } else if (Opc == ISD::AND) {
9336     // An AND with a constant mask is the same as a truncate + zero-extend.
9337     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9338     if (!AndC)
9339       return SDValue();
9340
9341     const APInt &Mask = AndC->getAPIntValue();
9342     unsigned ActiveBits = 0;
9343     if (Mask.isMask()) {
9344       ActiveBits = Mask.countTrailingOnes();
9345     } else if (Mask.isShiftedMask()) {
9346       ShAmt = Mask.countTrailingZeros();
9347       APInt ShiftedMask = Mask.lshr(ShAmt);
9348       ActiveBits = ShiftedMask.countTrailingOnes();
9349       HasShiftedOffset = true;
9350     } else
9351       return SDValue();
9352
9353     ExtType = ISD::ZEXTLOAD;
9354     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9355   }
9356
9357   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9358     SDValue SRL = N0;
9359     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9360       ShAmt = ConstShift->getZExtValue();
9361       unsigned EVTBits = ExtVT.getSizeInBits();
9362       // Is the shift amount a multiple of size of VT?
9363       if ((ShAmt & (EVTBits-1)) == 0) {
9364         N0 = N0.getOperand(0);
9365         // Is the load width a multiple of size of VT?
9366         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9367           return SDValue();
9368       }
9369
9370       // At this point, we must have a load or else we can't do the transform.
9371       if (!isa<LoadSDNode>(N0)) return SDValue();
9372
9373       auto *LN0 = cast<LoadSDNode>(N0);
9374
9375       // Because a SRL must be assumed to *need* to zero-extend the high bits
9376       // (as opposed to anyext the high bits), we can't combine the zextload
9377       // lowering of SRL and an sextload.
9378       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9379         return SDValue();
9380
9381       // If the shift amount is larger than the input type then we're not
9382       // accessing any of the loaded bytes.  If the load was a zextload/extload
9383       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9384       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9385         return SDValue();
9386
9387       // If the SRL is only used by a masking AND, we may be able to adjust
9388       // the ExtVT to make the AND redundant.
9389       SDNode *Mask = *(SRL->use_begin());
9390       if (Mask->getOpcode() == ISD::AND &&
9391           isa<ConstantSDNode>(Mask->getOperand(1))) {
9392         const APInt &ShiftMask =
9393           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9394         if (ShiftMask.isMask()) {
9395           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9396                                            ShiftMask.countTrailingOnes());
9397           // If the mask is smaller, recompute the type.
9398           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9399               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9400             ExtVT = MaskedVT;
9401         }
9402       }
9403     }
9404   }
9405
9406   // If the load is shifted left (and the result isn't shifted back right),
9407   // we can fold the truncate through the shift.
9408   unsigned ShLeftAmt = 0;
9409   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9410       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9411     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9412       ShLeftAmt = N01->getZExtValue();
9413       N0 = N0.getOperand(0);
9414     }
9415   }
9416
9417   // If we haven't found a load, we can't narrow it.
9418   if (!isa<LoadSDNode>(N0))
9419     return SDValue();
9420
9421   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9422   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9423     return SDValue();
9424
9425   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9426     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9427     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9428     return LVTStoreBits - EVTStoreBits - ShAmt;
9429   };
9430
9431   // For big endian targets, we need to adjust the offset to the pointer to
9432   // load the correct bytes.
9433   if (DAG.getDataLayout().isBigEndian())
9434     ShAmt = AdjustBigEndianShift(ShAmt);
9435
9436   EVT PtrType = N0.getOperand(1).getValueType();
9437   uint64_t PtrOff = ShAmt / 8;
9438   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9439   SDLoc DL(LN0);
9440   // The original load itself didn't wrap, so an offset within it doesn't.
9441   SDNodeFlags Flags;
9442   Flags.setNoUnsignedWrap(true);
9443   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9444                                PtrType, LN0->getBasePtr(),
9445                                DAG.getConstant(PtrOff, DL, PtrType),
9446                                Flags);
9447   AddToWorklist(NewPtr.getNode());
9448
9449   SDValue Load;
9450   if (ExtType == ISD::NON_EXTLOAD)
9451     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9452                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9453                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9454   else
9455     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9456                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9457                           NewAlign, LN0->getMemOperand()->getFlags(),
9458                           LN0->getAAInfo());
9459
9460   // Replace the old load's chain with the new load's chain.
9461   WorklistRemover DeadNodes(*this);
9462   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9463
9464   // Shift the result left, if we've swallowed a left shift.
9465   SDValue Result = Load;
9466   if (ShLeftAmt != 0) {
9467     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9468     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9469       ShImmTy = VT;
9470     // If the shift amount is as large as the result size (but, presumably,
9471     // no larger than the source) then the useful bits of the result are
9472     // zero; we can't simply return the shortened shift, because the result
9473     // of that operation is undefined.
9474     SDLoc DL(N0);
9475     if (ShLeftAmt >= VT.getSizeInBits())
9476       Result = DAG.getConstant(0, DL, VT);
9477     else
9478       Result = DAG.getNode(ISD::SHL, DL, VT,
9479                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9480   }
9481
9482   if (HasShiftedOffset) {
9483     // Recalculate the shift amount after it has been altered to calculate
9484     // the offset.
9485     if (DAG.getDataLayout().isBigEndian())
9486       ShAmt = AdjustBigEndianShift(ShAmt);
9487
9488     // We're using a shifted mask, so the load now has an offset. This means
9489     // that data has been loaded into the lower bytes than it would have been
9490     // before, so we need to shl the loaded data into the correct position in the
9491     // register.
9492     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9493     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9494     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9495   }
9496
9497   // Return the new loaded value.
9498   return Result;
9499 }
9500
9501 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9502   SDValue N0 = N->getOperand(0);
9503   SDValue N1 = N->getOperand(1);
9504   EVT VT = N->getValueType(0);
9505   EVT EVT = cast<VTSDNode>(N1)->getVT();
9506   unsigned VTBits = VT.getScalarSizeInBits();
9507   unsigned EVTBits = EVT.getScalarSizeInBits();
9508
9509   if (N0.isUndef())
9510     return DAG.getUNDEF(VT);
9511
9512   // fold (sext_in_reg c1) -> c1
9513   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9514     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9515
9516   // If the input is already sign extended, just drop the extension.
9517   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9518     return N0;
9519
9520   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9521   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9522       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9523     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9524                        N0.getOperand(0), N1);
9525
9526   // fold (sext_in_reg (sext x)) -> (sext x)
9527   // fold (sext_in_reg (aext x)) -> (sext x)
9528   // if x is small enough or if we know that x has more than 1 sign bit and the
9529   // sign_extend_inreg is extending from one of them.
9530   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9531     SDValue N00 = N0.getOperand(0);
9532     unsigned N00Bits = N00.getScalarValueSizeInBits();
9533     if ((N00Bits <= EVTBits ||
9534          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9535         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9536       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9537   }
9538
9539   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9540   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9541        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9542        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9543       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9544     if (!LegalOperations ||
9545         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9546       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9547                          N0.getOperand(0));
9548   }
9549
9550   // fold (sext_in_reg (zext x)) -> (sext x)
9551   // iff we are extending the source sign bit.
9552   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9553     SDValue N00 = N0.getOperand(0);
9554     if (N00.getScalarValueSizeInBits() == EVTBits &&
9555         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9556       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9557   }
9558
9559   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9560   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9561     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9562
9563   // fold operands of sext_in_reg based on knowledge that the top bits are not
9564   // demanded.
9565   if (SimplifyDemandedBits(SDValue(N, 0)))
9566     return SDValue(N, 0);
9567
9568   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9569   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9570   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9571     return NarrowLoad;
9572
9573   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9574   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9575   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9576   if (N0.getOpcode() == ISD::SRL) {
9577     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9578       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9579         // We can turn this into an SRA iff the input to the SRL is already sign
9580         // extended enough.
9581         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9582         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9583           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9584                              N0.getOperand(0), N0.getOperand(1));
9585       }
9586   }
9587
9588   // fold (sext_inreg (extload x)) -> (sextload x)
9589   // If sextload is not supported by target, we can only do the combine when
9590   // load has one use. Doing otherwise can block folding the extload with other
9591   // extends that the target does support.
9592   if (ISD::isEXTLoad(N0.getNode()) &&
9593       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9594       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9595       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9596         N0.hasOneUse()) ||
9597        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9598     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9599     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9600                                      LN0->getChain(),
9601                                      LN0->getBasePtr(), EVT,
9602                                      LN0->getMemOperand());
9603     CombineTo(N, ExtLoad);
9604     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9605     AddToWorklist(ExtLoad.getNode());
9606     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9607   }
9608   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9609   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9610       N0.hasOneUse() &&
9611       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9612       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9613        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9614     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9615     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9616                                      LN0->getChain(),
9617                                      LN0->getBasePtr(), EVT,
9618                                      LN0->getMemOperand());
9619     CombineTo(N, ExtLoad);
9620     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9621     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9622   }
9623
9624   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9625   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9626     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9627                                            N0.getOperand(1), false))
9628       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9629                          BSwap, N1);
9630   }
9631
9632   return SDValue();
9633 }
9634
9635 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9636   SDValue N0 = N->getOperand(0);
9637   EVT VT = N->getValueType(0);
9638
9639   if (N0.isUndef())
9640     return DAG.getUNDEF(VT);
9641
9642   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9643     return Res;
9644
9645   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9646     return SDValue(N, 0);
9647
9648   return SDValue();
9649 }
9650
9651 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9652   SDValue N0 = N->getOperand(0);
9653   EVT VT = N->getValueType(0);
9654
9655   if (N0.isUndef())
9656     return DAG.getUNDEF(VT);
9657
9658   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9659     return Res;
9660
9661   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9662     return SDValue(N, 0);
9663
9664   return SDValue();
9665 }
9666
9667 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9668   SDValue N0 = N->getOperand(0);
9669   EVT VT = N->getValueType(0);
9670   bool isLE = DAG.getDataLayout().isLittleEndian();
9671
9672   // noop truncate
9673   if (N0.getValueType() == N->getValueType(0))
9674     return N0;
9675
9676   // fold (truncate (truncate x)) -> (truncate x)
9677   if (N0.getOpcode() == ISD::TRUNCATE)
9678     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9679
9680   // fold (truncate c1) -> c1
9681   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9682     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9683     if (C.getNode() != N)
9684       return C;
9685   }
9686
9687   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9688   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9689       N0.getOpcode() == ISD::SIGN_EXTEND ||
9690       N0.getOpcode() == ISD::ANY_EXTEND) {
9691     // if the source is smaller than the dest, we still need an extend.
9692     if (N0.getOperand(0).getValueType().bitsLT(VT))
9693       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9694     // if the source is larger than the dest, than we just need the truncate.
9695     if (N0.getOperand(0).getValueType().bitsGT(VT))
9696       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9697     // if the source and dest are the same type, we can drop both the extend
9698     // and the truncate.
9699     return N0.getOperand(0);
9700   }
9701
9702   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9703   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9704     return SDValue();
9705
9706   // Fold extract-and-trunc into a narrow extract. For example:
9707   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9708   //   i32 y = TRUNCATE(i64 x)
9709   //        -- becomes --
9710   //   v16i8 b = BITCAST (v2i64 val)
9711   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9712   //
9713   // Note: We only run this optimization after type legalization (which often
9714   // creates this pattern) and before operation legalization after which
9715   // we need to be more careful about the vector instructions that we generate.
9716   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9717       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9718     EVT VecTy = N0.getOperand(0).getValueType();
9719     EVT ExTy = N0.getValueType();
9720     EVT TrTy = N->getValueType(0);
9721
9722     unsigned NumElem = VecTy.getVectorNumElements();
9723     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9724
9725     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9726     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9727
9728     SDValue EltNo = N0->getOperand(1);
9729     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9730       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9731       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9732       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9733
9734       SDLoc DL(N);
9735       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9736                          DAG.getBitcast(NVT, N0.getOperand(0)),
9737                          DAG.getConstant(Index, DL, IndexTy));
9738     }
9739   }
9740
9741   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9742   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9743     EVT SrcVT = N0.getValueType();
9744     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9745         TLI.isTruncateFree(SrcVT, VT)) {
9746       SDLoc SL(N0);
9747       SDValue Cond = N0.getOperand(0);
9748       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9749       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9750       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9751     }
9752   }
9753
9754   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9755   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9756       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9757       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9758     SDValue Amt = N0.getOperand(1);
9759     KnownBits Known = DAG.computeKnownBits(Amt);
9760     unsigned Size = VT.getScalarSizeInBits();
9761     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9762       SDLoc SL(N);
9763       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9764
9765       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9766       if (AmtVT != Amt.getValueType()) {
9767         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9768         AddToWorklist(Amt.getNode());
9769       }
9770       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9771     }
9772   }
9773
9774   // Fold a series of buildvector, bitcast, and truncate if possible.
9775   // For example fold
9776   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9777   //   (2xi32 (buildvector x, y)).
9778   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9779       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9780       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9781       N0.getOperand(0).hasOneUse()) {
9782     SDValue BuildVect = N0.getOperand(0);
9783     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9784     EVT TruncVecEltTy = VT.getVectorElementType();
9785
9786     // Check that the element types match.
9787     if (BuildVectEltTy == TruncVecEltTy) {
9788       // Now we only need to compute the offset of the truncated elements.
9789       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9790       unsigned TruncVecNumElts = VT.getVectorNumElements();
9791       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9792
9793       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9794              "Invalid number of elements");
9795
9796       SmallVector<SDValue, 8> Opnds;
9797       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9798         Opnds.push_back(BuildVect.getOperand(i));
9799
9800       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9801     }
9802   }
9803
9804   // See if we can simplify the input to this truncate through knowledge that
9805   // only the low bits are being used.
9806   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9807   // Currently we only perform this optimization on scalars because vectors
9808   // may have different active low bits.
9809   if (!VT.isVector()) {
9810     APInt Mask =
9811         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9812     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9813       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9814   }
9815
9816   // fold (truncate (load x)) -> (smaller load x)
9817   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9818   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9819     if (SDValue Reduced = ReduceLoadWidth(N))
9820       return Reduced;
9821
9822     // Handle the case where the load remains an extending load even
9823     // after truncation.
9824     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9825       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9826       if (!LN0->isVolatile() &&
9827           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9828         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9829                                          VT, LN0->getChain(), LN0->getBasePtr(),
9830                                          LN0->getMemoryVT(),
9831                                          LN0->getMemOperand());
9832         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9833         return NewLoad;
9834       }
9835     }
9836   }
9837
9838   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9839   // where ... are all 'undef'.
9840   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9841     SmallVector<EVT, 8> VTs;
9842     SDValue V;
9843     unsigned Idx = 0;
9844     unsigned NumDefs = 0;
9845
9846     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9847       SDValue X = N0.getOperand(i);
9848       if (!X.isUndef()) {
9849         V = X;
9850         Idx = i;
9851         NumDefs++;
9852       }
9853       // Stop if more than one members are non-undef.
9854       if (NumDefs > 1)
9855         break;
9856       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9857                                      VT.getVectorElementType(),
9858                                      X.getValueType().getVectorNumElements()));
9859     }
9860
9861     if (NumDefs == 0)
9862       return DAG.getUNDEF(VT);
9863
9864     if (NumDefs == 1) {
9865       assert(V.getNode() && "The single defined operand is empty!");
9866       SmallVector<SDValue, 8> Opnds;
9867       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9868         if (i != Idx) {
9869           Opnds.push_back(DAG.getUNDEF(VTs[i]));
9870           continue;
9871         }
9872         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9873         AddToWorklist(NV.getNode());
9874         Opnds.push_back(NV);
9875       }
9876       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9877     }
9878   }
9879
9880   // Fold truncate of a bitcast of a vector to an extract of the low vector
9881   // element.
9882   //
9883   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9884   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9885     SDValue VecSrc = N0.getOperand(0);
9886     EVT SrcVT = VecSrc.getValueType();
9887     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9888         (!LegalOperations ||
9889          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9890       SDLoc SL(N);
9891
9892       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9893       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9894       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9895                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9896     }
9897   }
9898
9899   // Simplify the operands using demanded-bits information.
9900   if (!VT.isVector() &&
9901       SimplifyDemandedBits(SDValue(N, 0)))
9902     return SDValue(N, 0);
9903
9904   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9905   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9906   // When the adde's carry is not used.
9907   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9908       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9909       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9910     SDLoc SL(N);
9911     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9912     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9913     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9914     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9915   }
9916
9917   // fold (truncate (extract_subvector(ext x))) ->
9918   //      (extract_subvector x)
9919   // TODO: This can be generalized to cover cases where the truncate and extract
9920   // do not fully cancel each other out.
9921   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9922     SDValue N00 = N0.getOperand(0);
9923     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9924         N00.getOpcode() == ISD::ZERO_EXTEND ||
9925         N00.getOpcode() == ISD::ANY_EXTEND) {
9926       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9927           VT.getVectorElementType())
9928         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9929                            N00.getOperand(0), N0.getOperand(1));
9930     }
9931   }
9932
9933   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9934     return NewVSel;
9935
9936   // Narrow a suitable binary operation with a non-opaque constant operand by
9937   // moving it ahead of the truncate. This is limited to pre-legalization
9938   // because targets may prefer a wider type during later combines and invert
9939   // this transform.
9940   switch (N0.getOpcode()) {
9941   case ISD::ADD:
9942   case ISD::SUB:
9943   case ISD::MUL:
9944   case ISD::AND:
9945   case ISD::OR:
9946   case ISD::XOR:
9947     if (!LegalOperations && N0.hasOneUse() &&
9948         (isConstantOrConstantVector(N0.getOperand(0), true) ||
9949          isConstantOrConstantVector(N0.getOperand(1), true))) {
9950       // TODO: We already restricted this to pre-legalization, but for vectors
9951       // we are extra cautious to not create an unsupported operation.
9952       // Target-specific changes are likely needed to avoid regressions here.
9953       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
9954         SDLoc DL(N);
9955         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
9956         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
9957         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
9958       }
9959     }
9960   }
9961
9962   return SDValue();
9963 }
9964
9965 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9966   SDValue Elt = N->getOperand(i);
9967   if (Elt.getOpcode() != ISD::MERGE_VALUES)
9968     return Elt.getNode();
9969   return Elt.getOperand(Elt.getResNo()).getNode();
9970 }
9971
9972 /// build_pair (load, load) -> load
9973 /// if load locations are consecutive.
9974 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9975   assert(N->getOpcode() == ISD::BUILD_PAIR);
9976
9977   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9978   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9979
9980   // A BUILD_PAIR is always having the least significant part in elt 0 and the
9981   // most significant part in elt 1. So when combining into one large load, we
9982   // need to consider the endianness.
9983   if (DAG.getDataLayout().isBigEndian())
9984     std::swap(LD1, LD2);
9985
9986   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9987       LD1->getAddressSpace() != LD2->getAddressSpace())
9988     return SDValue();
9989   EVT LD1VT = LD1->getValueType(0);
9990   unsigned LD1Bytes = LD1VT.getStoreSize();
9991   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9992       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9993     unsigned Align = LD1->getAlignment();
9994     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9995         VT.getTypeForEVT(*DAG.getContext()));
9996
9997     if (NewAlign <= Align &&
9998         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9999       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10000                          LD1->getPointerInfo(), Align);
10001   }
10002
10003   return SDValue();
10004 }
10005
10006 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10007   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10008   // and Lo parts; on big-endian machines it doesn't.
10009   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10010 }
10011
10012 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10013                                     const TargetLowering &TLI) {
10014   // If this is not a bitcast to an FP type or if the target doesn't have
10015   // IEEE754-compliant FP logic, we're done.
10016   EVT VT = N->getValueType(0);
10017   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10018     return SDValue();
10019
10020   // TODO: Handle cases where the integer constant is a different scalar
10021   // bitwidth to the FP.
10022   SDValue N0 = N->getOperand(0);
10023   EVT SourceVT = N0.getValueType();
10024   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10025     return SDValue();
10026
10027   unsigned FPOpcode;
10028   APInt SignMask;
10029   switch (N0.getOpcode()) {
10030   case ISD::AND:
10031     FPOpcode = ISD::FABS;
10032     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10033     break;
10034   case ISD::XOR:
10035     FPOpcode = ISD::FNEG;
10036     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10037     break;
10038   case ISD::OR:
10039     FPOpcode = ISD::FABS;
10040     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10041     break;
10042   default:
10043     return SDValue();
10044   }
10045
10046   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10047   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10048   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10049   //   fneg (fabs X)
10050   SDValue LogicOp0 = N0.getOperand(0);
10051   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10052   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10053       LogicOp0.getOpcode() == ISD::BITCAST &&
10054       LogicOp0.getOperand(0).getValueType() == VT) {
10055     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10056     NumFPLogicOpsConv++;
10057     if (N0.getOpcode() == ISD::OR)
10058       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10059     return FPOp;
10060   }
10061
10062   return SDValue();
10063 }
10064
10065 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10066   SDValue N0 = N->getOperand(0);
10067   EVT VT = N->getValueType(0);
10068
10069   if (N0.isUndef())
10070     return DAG.getUNDEF(VT);
10071
10072   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10073   // Only do this before legalize types, since we might create an illegal
10074   // scalar type. Even if we knew we wouldn't create an illegal scalar type
10075   // we can only do this before legalize ops, since the target maybe
10076   // depending on the bitcast.
10077   // First check to see if this is all constant.
10078   if (!LegalTypes &&
10079       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10080       VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
10081     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10082                                              VT.getVectorElementType());
10083
10084   // If the input is a constant, let getNode fold it.
10085   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10086     // If we can't allow illegal operations, we need to check that this is just
10087     // a fp -> int or int -> conversion and that the resulting operation will
10088     // be legal.
10089     if (!LegalOperations ||
10090         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10091          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10092         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10093          TLI.isOperationLegal(ISD::Constant, VT))) {
10094       SDValue C = DAG.getBitcast(VT, N0);
10095       if (C.getNode() != N)
10096         return C;
10097     }
10098   }
10099
10100   // (conv (conv x, t1), t2) -> (conv x, t2)
10101   if (N0.getOpcode() == ISD::BITCAST)
10102     return DAG.getBitcast(VT, N0.getOperand(0));
10103
10104   // fold (conv (load x)) -> (load (conv*)x)
10105   // If the resultant load doesn't need a higher alignment than the original!
10106   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10107       // Do not remove the cast if the types differ in endian layout.
10108       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10109           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10110       // If the load is volatile, we only want to change the load type if the
10111       // resulting load is legal. Otherwise we might increase the number of
10112       // memory accesses. We don't care if the original type was legal or not
10113       // as we assume software couldn't rely on the number of accesses of an
10114       // illegal type.
10115       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10116        TLI.isOperationLegal(ISD::LOAD, VT)) &&
10117       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10118     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10119     unsigned OrigAlign = LN0->getAlignment();
10120
10121     bool Fast = false;
10122     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10123                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
10124         Fast) {
10125       SDValue Load =
10126           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10127                       LN0->getPointerInfo(), OrigAlign,
10128                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10129       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10130       return Load;
10131     }
10132   }
10133
10134   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10135     return V;
10136
10137   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10138   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10139   //
10140   // For ppc_fp128:
10141   // fold (bitcast (fneg x)) ->
10142   //     flipbit = signbit
10143   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10144   //
10145   // fold (bitcast (fabs x)) ->
10146   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
10147   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10148   // This often reduces constant pool loads.
10149   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10150        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10151       N0.getNode()->hasOneUse() && VT.isInteger() &&
10152       !VT.isVector() && !N0.getValueType().isVector()) {
10153     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10154     AddToWorklist(NewConv.getNode());
10155
10156     SDLoc DL(N);
10157     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10158       assert(VT.getSizeInBits() == 128);
10159       SDValue SignBit = DAG.getConstant(
10160           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10161       SDValue FlipBit;
10162       if (N0.getOpcode() == ISD::FNEG) {
10163         FlipBit = SignBit;
10164         AddToWorklist(FlipBit.getNode());
10165       } else {
10166         assert(N0.getOpcode() == ISD::FABS);
10167         SDValue Hi =
10168             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10169                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10170                                               SDLoc(NewConv)));
10171         AddToWorklist(Hi.getNode());
10172         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10173         AddToWorklist(FlipBit.getNode());
10174       }
10175       SDValue FlipBits =
10176           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10177       AddToWorklist(FlipBits.getNode());
10178       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10179     }
10180     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10181     if (N0.getOpcode() == ISD::FNEG)
10182       return DAG.getNode(ISD::XOR, DL, VT,
10183                          NewConv, DAG.getConstant(SignBit, DL, VT));
10184     assert(N0.getOpcode() == ISD::FABS);
10185     return DAG.getNode(ISD::AND, DL, VT,
10186                        NewConv, DAG.getConstant(~SignBit, DL, VT));
10187   }
10188
10189   // fold (bitconvert (fcopysign cst, x)) ->
10190   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
10191   // Note that we don't handle (copysign x, cst) because this can always be
10192   // folded to an fneg or fabs.
10193   //
10194   // For ppc_fp128:
10195   // fold (bitcast (fcopysign cst, x)) ->
10196   //     flipbit = (and (extract_element
10197   //                     (xor (bitcast cst), (bitcast x)), 0),
10198   //                    signbit)
10199   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
10200   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10201       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10202       VT.isInteger() && !VT.isVector()) {
10203     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10204     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10205     if (isTypeLegal(IntXVT)) {
10206       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10207       AddToWorklist(X.getNode());
10208
10209       // If X has a different width than the result/lhs, sext it or truncate it.
10210       unsigned VTWidth = VT.getSizeInBits();
10211       if (OrigXWidth < VTWidth) {
10212         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10213         AddToWorklist(X.getNode());
10214       } else if (OrigXWidth > VTWidth) {
10215         // To get the sign bit in the right place, we have to shift it right
10216         // before truncating.
10217         SDLoc DL(X);
10218         X = DAG.getNode(ISD::SRL, DL,
10219                         X.getValueType(), X,
10220                         DAG.getConstant(OrigXWidth-VTWidth, DL,
10221                                         X.getValueType()));
10222         AddToWorklist(X.getNode());
10223         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10224         AddToWorklist(X.getNode());
10225       }
10226
10227       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10228         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10229         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10230         AddToWorklist(Cst.getNode());
10231         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10232         AddToWorklist(X.getNode());
10233         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10234         AddToWorklist(XorResult.getNode());
10235         SDValue XorResult64 = DAG.getNode(
10236             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10237             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10238                                   SDLoc(XorResult)));
10239         AddToWorklist(XorResult64.getNode());
10240         SDValue FlipBit =
10241             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10242                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10243         AddToWorklist(FlipBit.getNode());
10244         SDValue FlipBits =
10245             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10246         AddToWorklist(FlipBits.getNode());
10247         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10248       }
10249       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10250       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10251                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10252       AddToWorklist(X.getNode());
10253
10254       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10255       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10256                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10257       AddToWorklist(Cst.getNode());
10258
10259       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10260     }
10261   }
10262
10263   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10264   if (N0.getOpcode() == ISD::BUILD_PAIR)
10265     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10266       return CombineLD;
10267
10268   // Remove double bitcasts from shuffles - this is often a legacy of
10269   // XformToShuffleWithZero being used to combine bitmaskings (of
10270   // float vectors bitcast to integer vectors) into shuffles.
10271   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10272   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10273       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10274       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10275       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10276     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10277
10278     // If operands are a bitcast, peek through if it casts the original VT.
10279     // If operands are a constant, just bitcast back to original VT.
10280     auto PeekThroughBitcast = [&](SDValue Op) {
10281       if (Op.getOpcode() == ISD::BITCAST &&
10282           Op.getOperand(0).getValueType() == VT)
10283         return SDValue(Op.getOperand(0));
10284       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10285           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10286         return DAG.getBitcast(VT, Op);
10287       return SDValue();
10288     };
10289
10290     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10291     // the result type of this bitcast. This would eliminate at least one
10292     // bitcast. See the transform in InstCombine.
10293     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10294     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10295     if (!(SV0 && SV1))
10296       return SDValue();
10297
10298     int MaskScale =
10299         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10300     SmallVector<int, 8> NewMask;
10301     for (int M : SVN->getMask())
10302       for (int i = 0; i != MaskScale; ++i)
10303         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10304
10305     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10306     if (!LegalMask) {
10307       std::swap(SV0, SV1);
10308       ShuffleVectorSDNode::commuteMask(NewMask);
10309       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10310     }
10311
10312     if (LegalMask)
10313       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10314   }
10315
10316   return SDValue();
10317 }
10318
10319 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10320   EVT VT = N->getValueType(0);
10321   return CombineConsecutiveLoads(N, VT);
10322 }
10323
10324 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10325 /// operands. DstEltVT indicates the destination element value type.
10326 SDValue DAGCombiner::
10327 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10328   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10329
10330   // If this is already the right type, we're done.
10331   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10332
10333   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10334   unsigned DstBitSize = DstEltVT.getSizeInBits();
10335
10336   // If this is a conversion of N elements of one type to N elements of another
10337   // type, convert each element.  This handles FP<->INT cases.
10338   if (SrcBitSize == DstBitSize) {
10339     SmallVector<SDValue, 8> Ops;
10340     for (SDValue Op : BV->op_values()) {
10341       // If the vector element type is not legal, the BUILD_VECTOR operands
10342       // are promoted and implicitly truncated.  Make that explicit here.
10343       if (Op.getValueType() != SrcEltVT)
10344         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10345       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10346       AddToWorklist(Ops.back().getNode());
10347     }
10348     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10349                               BV->getValueType(0).getVectorNumElements());
10350     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10351   }
10352
10353   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10354   // handle annoying details of growing/shrinking FP values, we convert them to
10355   // int first.
10356   if (SrcEltVT.isFloatingPoint()) {
10357     // Convert the input float vector to a int vector where the elements are the
10358     // same sizes.
10359     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10360     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10361     SrcEltVT = IntVT;
10362   }
10363
10364   // Now we know the input is an integer vector.  If the output is a FP type,
10365   // convert to integer first, then to FP of the right size.
10366   if (DstEltVT.isFloatingPoint()) {
10367     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10368     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10369
10370     // Next, convert to FP elements of the same size.
10371     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10372   }
10373
10374   SDLoc DL(BV);
10375
10376   // Okay, we know the src/dst types are both integers of differing types.
10377   // Handling growing first.
10378   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10379   if (SrcBitSize < DstBitSize) {
10380     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10381
10382     SmallVector<SDValue, 8> Ops;
10383     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10384          i += NumInputsPerOutput) {
10385       bool isLE = DAG.getDataLayout().isLittleEndian();
10386       APInt NewBits = APInt(DstBitSize, 0);
10387       bool EltIsUndef = true;
10388       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10389         // Shift the previously computed bits over.
10390         NewBits <<= SrcBitSize;
10391         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10392         if (Op.isUndef()) continue;
10393         EltIsUndef = false;
10394
10395         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10396                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10397       }
10398
10399       if (EltIsUndef)
10400         Ops.push_back(DAG.getUNDEF(DstEltVT));
10401       else
10402         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10403     }
10404
10405     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10406     return DAG.getBuildVector(VT, DL, Ops);
10407   }
10408
10409   // Finally, this must be the case where we are shrinking elements: each input
10410   // turns into multiple outputs.
10411   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10412   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10413                             NumOutputsPerInput*BV->getNumOperands());
10414   SmallVector<SDValue, 8> Ops;
10415
10416   for (const SDValue &Op : BV->op_values()) {
10417     if (Op.isUndef()) {
10418       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10419       continue;
10420     }
10421
10422     APInt OpVal = cast<ConstantSDNode>(Op)->
10423                   getAPIntValue().zextOrTrunc(SrcBitSize);
10424
10425     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10426       APInt ThisVal = OpVal.trunc(DstBitSize);
10427       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10428       OpVal.lshrInPlace(DstBitSize);
10429     }
10430
10431     // For big endian targets, swap the order of the pieces of each element.
10432     if (DAG.getDataLayout().isBigEndian())
10433       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10434   }
10435
10436   return DAG.getBuildVector(VT, DL, Ops);
10437 }
10438
10439 static bool isContractable(SDNode *N) {
10440   SDNodeFlags F = N->getFlags();
10441   return F.hasAllowContract() || F.hasAllowReassociation();
10442 }
10443
10444 /// Try to perform FMA combining on a given FADD node.
10445 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10446   SDValue N0 = N->getOperand(0);
10447   SDValue N1 = N->getOperand(1);
10448   EVT VT = N->getValueType(0);
10449   SDLoc SL(N);
10450
10451   const TargetOptions &Options = DAG.getTarget().Options;
10452
10453   // Floating-point multiply-add with intermediate rounding.
10454   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10455
10456   // Floating-point multiply-add without intermediate rounding.
10457   bool HasFMA =
10458       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10459       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10460
10461   // No valid opcode, do not combine.
10462   if (!HasFMAD && !HasFMA)
10463     return SDValue();
10464
10465   SDNodeFlags Flags = N->getFlags();
10466   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10467   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10468                               CanFuse || HasFMAD);
10469   // If the addition is not contractable, do not combine.
10470   if (!AllowFusionGlobally && !isContractable(N))
10471     return SDValue();
10472
10473   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10474   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10475     return SDValue();
10476
10477   // Always prefer FMAD to FMA for precision.
10478   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10479   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10480
10481   // Is the node an FMUL and contractable either due to global flags or
10482   // SDNodeFlags.
10483   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10484     if (N.getOpcode() != ISD::FMUL)
10485       return false;
10486     return AllowFusionGlobally || isContractable(N.getNode());
10487   };
10488   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10489   // prefer to fold the multiply with fewer uses.
10490   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10491     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10492       std::swap(N0, N1);
10493   }
10494
10495   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10496   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10497     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10498                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10499   }
10500
10501   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10502   // Note: Commutes FADD operands.
10503   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10504     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10505                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10506   }
10507
10508   // Look through FP_EXTEND nodes to do more combining.
10509
10510   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10511   if (N0.getOpcode() == ISD::FP_EXTEND) {
10512     SDValue N00 = N0.getOperand(0);
10513     if (isContractableFMUL(N00) &&
10514         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10515       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10516                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10517                                      N00.getOperand(0)),
10518                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10519                                      N00.getOperand(1)), N1, Flags);
10520     }
10521   }
10522
10523   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10524   // Note: Commutes FADD operands.
10525   if (N1.getOpcode() == ISD::FP_EXTEND) {
10526     SDValue N10 = N1.getOperand(0);
10527     if (isContractableFMUL(N10) &&
10528         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10529       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10530                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10531                                      N10.getOperand(0)),
10532                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10533                                      N10.getOperand(1)), N0, Flags);
10534     }
10535   }
10536
10537   // More folding opportunities when target permits.
10538   if (Aggressive) {
10539     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10540     if (CanFuse &&
10541         N0.getOpcode() == PreferredFusedOpcode &&
10542         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10543         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10544       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10545                          N0.getOperand(0), N0.getOperand(1),
10546                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10547                                      N0.getOperand(2).getOperand(0),
10548                                      N0.getOperand(2).getOperand(1),
10549                                      N1, Flags), Flags);
10550     }
10551
10552     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10553     if (CanFuse &&
10554         N1->getOpcode() == PreferredFusedOpcode &&
10555         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10556         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10557       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10558                          N1.getOperand(0), N1.getOperand(1),
10559                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10560                                      N1.getOperand(2).getOperand(0),
10561                                      N1.getOperand(2).getOperand(1),
10562                                      N0, Flags), Flags);
10563     }
10564
10565
10566     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10567     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10568     auto FoldFAddFMAFPExtFMul = [&] (
10569       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10570       SDNodeFlags Flags) {
10571       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10572                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10573                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10574                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10575                                      Z, Flags), Flags);
10576     };
10577     if (N0.getOpcode() == PreferredFusedOpcode) {
10578       SDValue N02 = N0.getOperand(2);
10579       if (N02.getOpcode() == ISD::FP_EXTEND) {
10580         SDValue N020 = N02.getOperand(0);
10581         if (isContractableFMUL(N020) &&
10582             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10583           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10584                                       N020.getOperand(0), N020.getOperand(1),
10585                                       N1, Flags);
10586         }
10587       }
10588     }
10589
10590     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10591     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10592     // FIXME: This turns two single-precision and one double-precision
10593     // operation into two double-precision operations, which might not be
10594     // interesting for all targets, especially GPUs.
10595     auto FoldFAddFPExtFMAFMul = [&] (
10596       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10597       SDNodeFlags Flags) {
10598       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10599                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10600                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10601                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10602                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10603                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10604                                      Z, Flags), Flags);
10605     };
10606     if (N0.getOpcode() == ISD::FP_EXTEND) {
10607       SDValue N00 = N0.getOperand(0);
10608       if (N00.getOpcode() == PreferredFusedOpcode) {
10609         SDValue N002 = N00.getOperand(2);
10610         if (isContractableFMUL(N002) &&
10611             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10612           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10613                                       N002.getOperand(0), N002.getOperand(1),
10614                                       N1, Flags);
10615         }
10616       }
10617     }
10618
10619     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10620     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10621     if (N1.getOpcode() == PreferredFusedOpcode) {
10622       SDValue N12 = N1.getOperand(2);
10623       if (N12.getOpcode() == ISD::FP_EXTEND) {
10624         SDValue N120 = N12.getOperand(0);
10625         if (isContractableFMUL(N120) &&
10626             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10627           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10628                                       N120.getOperand(0), N120.getOperand(1),
10629                                       N0, Flags);
10630         }
10631       }
10632     }
10633
10634     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10635     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10636     // FIXME: This turns two single-precision and one double-precision
10637     // operation into two double-precision operations, which might not be
10638     // interesting for all targets, especially GPUs.
10639     if (N1.getOpcode() == ISD::FP_EXTEND) {
10640       SDValue N10 = N1.getOperand(0);
10641       if (N10.getOpcode() == PreferredFusedOpcode) {
10642         SDValue N102 = N10.getOperand(2);
10643         if (isContractableFMUL(N102) &&
10644             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10645           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10646                                       N102.getOperand(0), N102.getOperand(1),
10647                                       N0, Flags);
10648         }
10649       }
10650     }
10651   }
10652
10653   return SDValue();
10654 }
10655
10656 /// Try to perform FMA combining on a given FSUB node.
10657 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10658   SDValue N0 = N->getOperand(0);
10659   SDValue N1 = N->getOperand(1);
10660   EVT VT = N->getValueType(0);
10661   SDLoc SL(N);
10662
10663   const TargetOptions &Options = DAG.getTarget().Options;
10664   // Floating-point multiply-add with intermediate rounding.
10665   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10666
10667   // Floating-point multiply-add without intermediate rounding.
10668   bool HasFMA =
10669       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10670       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10671
10672   // No valid opcode, do not combine.
10673   if (!HasFMAD && !HasFMA)
10674     return SDValue();
10675
10676   const SDNodeFlags Flags = N->getFlags();
10677   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10678   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10679                               CanFuse || HasFMAD);
10680
10681   // If the subtraction is not contractable, do not combine.
10682   if (!AllowFusionGlobally && !isContractable(N))
10683     return SDValue();
10684
10685   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10686   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10687     return SDValue();
10688
10689   // Always prefer FMAD to FMA for precision.
10690   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10691   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10692
10693   // Is the node an FMUL and contractable either due to global flags or
10694   // SDNodeFlags.
10695   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10696     if (N.getOpcode() != ISD::FMUL)
10697       return false;
10698     return AllowFusionGlobally || isContractable(N.getNode());
10699   };
10700
10701   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10702   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10703     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10704                        N0.getOperand(0), N0.getOperand(1),
10705                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10706   }
10707
10708   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10709   // Note: Commutes FSUB operands.
10710   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10711     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10712                        DAG.getNode(ISD::FNEG, SL, VT,
10713                                    N1.getOperand(0)),
10714                        N1.getOperand(1), N0, Flags);
10715   }
10716
10717   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10718   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10719       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10720     SDValue N00 = N0.getOperand(0).getOperand(0);
10721     SDValue N01 = N0.getOperand(0).getOperand(1);
10722     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10723                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10724                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10725   }
10726
10727   // Look through FP_EXTEND nodes to do more combining.
10728
10729   // fold (fsub (fpext (fmul x, y)), z)
10730   //   -> (fma (fpext x), (fpext y), (fneg z))
10731   if (N0.getOpcode() == ISD::FP_EXTEND) {
10732     SDValue N00 = N0.getOperand(0);
10733     if (isContractableFMUL(N00) &&
10734         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10735       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10736                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10737                                      N00.getOperand(0)),
10738                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10739                                      N00.getOperand(1)),
10740                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10741     }
10742   }
10743
10744   // fold (fsub x, (fpext (fmul y, z)))
10745   //   -> (fma (fneg (fpext y)), (fpext z), x)
10746   // Note: Commutes FSUB operands.
10747   if (N1.getOpcode() == ISD::FP_EXTEND) {
10748     SDValue N10 = N1.getOperand(0);
10749     if (isContractableFMUL(N10) &&
10750         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10751       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10752                          DAG.getNode(ISD::FNEG, SL, VT,
10753                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10754                                                  N10.getOperand(0))),
10755                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10756                                      N10.getOperand(1)),
10757                          N0, Flags);
10758     }
10759   }
10760
10761   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10762   //   -> (fneg (fma (fpext x), (fpext y), z))
10763   // Note: This could be removed with appropriate canonicalization of the
10764   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10765   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10766   // from implementing the canonicalization in visitFSUB.
10767   if (N0.getOpcode() == ISD::FP_EXTEND) {
10768     SDValue N00 = N0.getOperand(0);
10769     if (N00.getOpcode() == ISD::FNEG) {
10770       SDValue N000 = N00.getOperand(0);
10771       if (isContractableFMUL(N000) &&
10772           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10773         return DAG.getNode(ISD::FNEG, SL, VT,
10774                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10775                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10776                                                    N000.getOperand(0)),
10777                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10778                                                    N000.getOperand(1)),
10779                                        N1, Flags));
10780       }
10781     }
10782   }
10783
10784   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10785   //   -> (fneg (fma (fpext x)), (fpext y), z)
10786   // Note: This could be removed with appropriate canonicalization of the
10787   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10788   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10789   // from implementing the canonicalization in visitFSUB.
10790   if (N0.getOpcode() == ISD::FNEG) {
10791     SDValue N00 = N0.getOperand(0);
10792     if (N00.getOpcode() == ISD::FP_EXTEND) {
10793       SDValue N000 = N00.getOperand(0);
10794       if (isContractableFMUL(N000) &&
10795           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10796         return DAG.getNode(ISD::FNEG, SL, VT,
10797                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10798                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10799                                                    N000.getOperand(0)),
10800                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10801                                                    N000.getOperand(1)),
10802                                        N1, Flags));
10803       }
10804     }
10805   }
10806
10807   // More folding opportunities when target permits.
10808   if (Aggressive) {
10809     // fold (fsub (fma x, y, (fmul u, v)), z)
10810     //   -> (fma x, y (fma u, v, (fneg z)))
10811     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10812         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10813         N0.getOperand(2)->hasOneUse()) {
10814       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10815                          N0.getOperand(0), N0.getOperand(1),
10816                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10817                                      N0.getOperand(2).getOperand(0),
10818                                      N0.getOperand(2).getOperand(1),
10819                                      DAG.getNode(ISD::FNEG, SL, VT,
10820                                                  N1), Flags), Flags);
10821     }
10822
10823     // fold (fsub x, (fma y, z, (fmul u, v)))
10824     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10825     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10826         isContractableFMUL(N1.getOperand(2))) {
10827       SDValue N20 = N1.getOperand(2).getOperand(0);
10828       SDValue N21 = N1.getOperand(2).getOperand(1);
10829       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10830                          DAG.getNode(ISD::FNEG, SL, VT,
10831                                      N1.getOperand(0)),
10832                          N1.getOperand(1),
10833                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10834                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
10835                                      N21, N0, Flags), Flags);
10836     }
10837
10838
10839     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10840     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10841     if (N0.getOpcode() == PreferredFusedOpcode) {
10842       SDValue N02 = N0.getOperand(2);
10843       if (N02.getOpcode() == ISD::FP_EXTEND) {
10844         SDValue N020 = N02.getOperand(0);
10845         if (isContractableFMUL(N020) &&
10846             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10847           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10848                              N0.getOperand(0), N0.getOperand(1),
10849                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10850                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10851                                                      N020.getOperand(0)),
10852                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10853                                                      N020.getOperand(1)),
10854                                          DAG.getNode(ISD::FNEG, SL, VT,
10855                                                      N1), Flags), Flags);
10856         }
10857       }
10858     }
10859
10860     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10861     //   -> (fma (fpext x), (fpext y),
10862     //           (fma (fpext u), (fpext v), (fneg z)))
10863     // FIXME: This turns two single-precision and one double-precision
10864     // operation into two double-precision operations, which might not be
10865     // interesting for all targets, especially GPUs.
10866     if (N0.getOpcode() == ISD::FP_EXTEND) {
10867       SDValue N00 = N0.getOperand(0);
10868       if (N00.getOpcode() == PreferredFusedOpcode) {
10869         SDValue N002 = N00.getOperand(2);
10870         if (isContractableFMUL(N002) &&
10871             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10872           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10873                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10874                                          N00.getOperand(0)),
10875                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10876                                          N00.getOperand(1)),
10877                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10878                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10879                                                      N002.getOperand(0)),
10880                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10881                                                      N002.getOperand(1)),
10882                                          DAG.getNode(ISD::FNEG, SL, VT,
10883                                                      N1), Flags), Flags);
10884         }
10885       }
10886     }
10887
10888     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10889     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10890     if (N1.getOpcode() == PreferredFusedOpcode &&
10891         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10892       SDValue N120 = N1.getOperand(2).getOperand(0);
10893       if (isContractableFMUL(N120) &&
10894           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10895         SDValue N1200 = N120.getOperand(0);
10896         SDValue N1201 = N120.getOperand(1);
10897         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10898                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10899                            N1.getOperand(1),
10900                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10901                                        DAG.getNode(ISD::FNEG, SL, VT,
10902                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10903                                                                VT, N1200)),
10904                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10905                                                    N1201),
10906                                        N0, Flags), Flags);
10907       }
10908     }
10909
10910     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10911     //   -> (fma (fneg (fpext y)), (fpext z),
10912     //           (fma (fneg (fpext u)), (fpext v), x))
10913     // FIXME: This turns two single-precision and one double-precision
10914     // operation into two double-precision operations, which might not be
10915     // interesting for all targets, especially GPUs.
10916     if (N1.getOpcode() == ISD::FP_EXTEND &&
10917         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10918       SDValue CvtSrc = N1.getOperand(0);
10919       SDValue N100 = CvtSrc.getOperand(0);
10920       SDValue N101 = CvtSrc.getOperand(1);
10921       SDValue N102 = CvtSrc.getOperand(2);
10922       if (isContractableFMUL(N102) &&
10923           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10924         SDValue N1020 = N102.getOperand(0);
10925         SDValue N1021 = N102.getOperand(1);
10926         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10927                            DAG.getNode(ISD::FNEG, SL, VT,
10928                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10929                                                    N100)),
10930                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10931                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10932                                        DAG.getNode(ISD::FNEG, SL, VT,
10933                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10934                                                                VT, N1020)),
10935                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10936                                                    N1021),
10937                                        N0, Flags), Flags);
10938       }
10939     }
10940   }
10941
10942   return SDValue();
10943 }
10944
10945 /// Try to perform FMA combining on a given FMUL node based on the distributive
10946 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10947 /// subtraction instead of addition).
10948 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10949   SDValue N0 = N->getOperand(0);
10950   SDValue N1 = N->getOperand(1);
10951   EVT VT = N->getValueType(0);
10952   SDLoc SL(N);
10953   const SDNodeFlags Flags = N->getFlags();
10954
10955   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10956
10957   const TargetOptions &Options = DAG.getTarget().Options;
10958
10959   // The transforms below are incorrect when x == 0 and y == inf, because the
10960   // intermediate multiplication produces a nan.
10961   if (!Options.NoInfsFPMath)
10962     return SDValue();
10963
10964   // Floating-point multiply-add without intermediate rounding.
10965   bool HasFMA =
10966       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10967       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10968       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10969
10970   // Floating-point multiply-add with intermediate rounding. This can result
10971   // in a less precise result due to the changed rounding order.
10972   bool HasFMAD = Options.UnsafeFPMath &&
10973                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10974
10975   // No valid opcode, do not combine.
10976   if (!HasFMAD && !HasFMA)
10977     return SDValue();
10978
10979   // Always prefer FMAD to FMA for precision.
10980   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10981   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10982
10983   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
10984   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
10985   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10986     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10987       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
10988         if (C->isExactlyValue(+1.0))
10989           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10990                              Y, Flags);
10991         if (C->isExactlyValue(-1.0))
10992           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10993                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10994       }
10995     }
10996     return SDValue();
10997   };
10998
10999   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11000     return FMA;
11001   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11002     return FMA;
11003
11004   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11005   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11006   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11007   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11008   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11009     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11010       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11011         if (C0->isExactlyValue(+1.0))
11012           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11013                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11014                              Y, Flags);
11015         if (C0->isExactlyValue(-1.0))
11016           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11017                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11018                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11019       }
11020       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11021         if (C1->isExactlyValue(+1.0))
11022           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11023                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11024         if (C1->isExactlyValue(-1.0))
11025           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11026                              Y, Flags);
11027       }
11028     }
11029     return SDValue();
11030   };
11031
11032   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11033     return FMA;
11034   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11035     return FMA;
11036
11037   return SDValue();
11038 }
11039
11040 SDValue DAGCombiner::visitFADD(SDNode *N) {
11041   SDValue N0 = N->getOperand(0);
11042   SDValue N1 = N->getOperand(1);
11043   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11044   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11045   EVT VT = N->getValueType(0);
11046   SDLoc DL(N);
11047   const TargetOptions &Options = DAG.getTarget().Options;
11048   const SDNodeFlags Flags = N->getFlags();
11049
11050   // fold vector ops
11051   if (VT.isVector())
11052     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11053       return FoldedVOp;
11054
11055   // fold (fadd c1, c2) -> c1 + c2
11056   if (N0CFP && N1CFP)
11057     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11058
11059   // canonicalize constant to RHS
11060   if (N0CFP && !N1CFP)
11061     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11062
11063   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11064   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11065   if (N1C && N1C->isZero())
11066     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11067       return N0;
11068
11069   if (SDValue NewSel = foldBinOpIntoSelect(N))
11070     return NewSel;
11071
11072   // fold (fadd A, (fneg B)) -> (fsub A, B)
11073   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11074       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
11075     return DAG.getNode(ISD::FSUB, DL, VT, N0,
11076                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11077
11078   // fold (fadd (fneg A), B) -> (fsub B, A)
11079   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11080       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
11081     return DAG.getNode(ISD::FSUB, DL, VT, N1,
11082                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
11083
11084   auto isFMulNegTwo = [](SDValue FMul) {
11085     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11086       return false;
11087     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11088     return C && C->isExactlyValue(-2.0);
11089   };
11090
11091   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11092   if (isFMulNegTwo(N0)) {
11093     SDValue B = N0.getOperand(0);
11094     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11095     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11096   }
11097   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11098   if (isFMulNegTwo(N1)) {
11099     SDValue B = N1.getOperand(0);
11100     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11101     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11102   }
11103
11104   // No FP constant should be created after legalization as Instruction
11105   // Selection pass has a hard time dealing with FP constants.
11106   bool AllowNewConst = (Level < AfterLegalizeDAG);
11107
11108   // If 'unsafe math' or nnan is enabled, fold lots of things.
11109   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11110     // If allowed, fold (fadd (fneg x), x) -> 0.0
11111     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11112       return DAG.getConstantFP(0.0, DL, VT);
11113
11114     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11115     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11116       return DAG.getConstantFP(0.0, DL, VT);
11117   }
11118
11119   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11120   // TODO: break out portions of the transformations below for which Unsafe is
11121   //       considered and which do not require both nsz and reassoc
11122   if ((Options.UnsafeFPMath ||
11123        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11124       AllowNewConst) {
11125     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11126     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11127         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11128       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11129       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11130     }
11131
11132     // We can fold chains of FADD's of the same value into multiplications.
11133     // This transform is not safe in general because we are reducing the number
11134     // of rounding steps.
11135     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11136       if (N0.getOpcode() == ISD::FMUL) {
11137         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11138         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11139
11140         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11141         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11142           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11143                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11144           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11145         }
11146
11147         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11148         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11149             N1.getOperand(0) == N1.getOperand(1) &&
11150             N0.getOperand(0) == N1.getOperand(0)) {
11151           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11152                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11153           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11154         }
11155       }
11156
11157       if (N1.getOpcode() == ISD::FMUL) {
11158         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11159         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11160
11161         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11162         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11163           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11164                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11165           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11166         }
11167
11168         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11169         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11170             N0.getOperand(0) == N0.getOperand(1) &&
11171             N1.getOperand(0) == N0.getOperand(0)) {
11172           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11173                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11174           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11175         }
11176       }
11177
11178       if (N0.getOpcode() == ISD::FADD) {
11179         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11180         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11181         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11182             (N0.getOperand(0) == N1)) {
11183           return DAG.getNode(ISD::FMUL, DL, VT,
11184                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11185         }
11186       }
11187
11188       if (N1.getOpcode() == ISD::FADD) {
11189         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11190         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11191         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11192             N1.getOperand(0) == N0) {
11193           return DAG.getNode(ISD::FMUL, DL, VT,
11194                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11195         }
11196       }
11197
11198       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11199       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11200           N0.getOperand(0) == N0.getOperand(1) &&
11201           N1.getOperand(0) == N1.getOperand(1) &&
11202           N0.getOperand(0) == N1.getOperand(0)) {
11203         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11204                            DAG.getConstantFP(4.0, DL, VT), Flags);
11205       }
11206     }
11207   } // enable-unsafe-fp-math
11208
11209   // FADD -> FMA combines:
11210   if (SDValue Fused = visitFADDForFMACombine(N)) {
11211     AddToWorklist(Fused.getNode());
11212     return Fused;
11213   }
11214   return SDValue();
11215 }
11216
11217 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11218   SDValue N0 = N->getOperand(0);
11219   SDValue N1 = N->getOperand(1);
11220   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11221   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11222   EVT VT = N->getValueType(0);
11223   SDLoc DL(N);
11224   const TargetOptions &Options = DAG.getTarget().Options;
11225   const SDNodeFlags Flags = N->getFlags();
11226
11227   // fold vector ops
11228   if (VT.isVector())
11229     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11230       return FoldedVOp;
11231
11232   // fold (fsub c1, c2) -> c1-c2
11233   if (N0CFP && N1CFP)
11234     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11235
11236   if (SDValue NewSel = foldBinOpIntoSelect(N))
11237     return NewSel;
11238
11239   // (fsub A, 0) -> A
11240   if (N1CFP && N1CFP->isZero()) {
11241     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11242         Flags.hasNoSignedZeros()) {
11243       return N0;
11244     }
11245   }
11246
11247   if (N0 == N1) {
11248     // (fsub x, x) -> 0.0
11249     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11250       return DAG.getConstantFP(0.0f, DL, VT);
11251   }
11252
11253   // (fsub -0.0, N1) -> -N1
11254   if (N0CFP && N0CFP->isZero()) {
11255     if (N0CFP->isNegative() ||
11256         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11257       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11258         return GetNegatedExpression(N1, DAG, LegalOperations);
11259       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11260         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11261     }
11262   }
11263
11264   if ((Options.UnsafeFPMath ||
11265       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11266       && N1.getOpcode() == ISD::FADD) {
11267     // X - (X + Y) -> -Y
11268     if (N0 == N1->getOperand(0))
11269       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11270     // X - (Y + X) -> -Y
11271     if (N0 == N1->getOperand(1))
11272       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11273   }
11274
11275   // fold (fsub A, (fneg B)) -> (fadd A, B)
11276   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11277     return DAG.getNode(ISD::FADD, DL, VT, N0,
11278                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11279
11280   // FSUB -> FMA combines:
11281   if (SDValue Fused = visitFSUBForFMACombine(N)) {
11282     AddToWorklist(Fused.getNode());
11283     return Fused;
11284   }
11285
11286   return SDValue();
11287 }
11288
11289 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11290   SDValue N0 = N->getOperand(0);
11291   SDValue N1 = N->getOperand(1);
11292   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11293   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11294   EVT VT = N->getValueType(0);
11295   SDLoc DL(N);
11296   const TargetOptions &Options = DAG.getTarget().Options;
11297   const SDNodeFlags Flags = N->getFlags();
11298
11299   // fold vector ops
11300   if (VT.isVector()) {
11301     // This just handles C1 * C2 for vectors. Other vector folds are below.
11302     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11303       return FoldedVOp;
11304   }
11305
11306   // fold (fmul c1, c2) -> c1*c2
11307   if (N0CFP && N1CFP)
11308     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11309
11310   // canonicalize constant to RHS
11311   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11312      !isConstantFPBuildVectorOrConstantFP(N1))
11313     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11314
11315   // fold (fmul A, 1.0) -> A
11316   if (N1CFP && N1CFP->isExactlyValue(1.0))
11317     return N0;
11318
11319   if (SDValue NewSel = foldBinOpIntoSelect(N))
11320     return NewSel;
11321
11322   if (Options.UnsafeFPMath ||
11323       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11324     // fold (fmul A, 0) -> 0
11325     if (N1CFP && N1CFP->isZero())
11326       return N1;
11327   }
11328
11329   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11330     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11331     if (isConstantFPBuildVectorOrConstantFP(N1) &&
11332         N0.getOpcode() == ISD::FMUL) {
11333       SDValue N00 = N0.getOperand(0);
11334       SDValue N01 = N0.getOperand(1);
11335       // Avoid an infinite loop by making sure that N00 is not a constant
11336       // (the inner multiply has not been constant folded yet).
11337       if (isConstantFPBuildVectorOrConstantFP(N01) &&
11338           !isConstantFPBuildVectorOrConstantFP(N00)) {
11339         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11340         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11341       }
11342     }
11343
11344     // Match a special-case: we convert X * 2.0 into fadd.
11345     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11346     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11347         N0.getOperand(0) == N0.getOperand(1)) {
11348       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11349       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11350       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11351     }
11352   }
11353
11354   // fold (fmul X, 2.0) -> (fadd X, X)
11355   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11356     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11357
11358   // fold (fmul X, -1.0) -> (fneg X)
11359   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11360     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11361       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11362
11363   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11364   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11365     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11366       // Both can be negated for free, check to see if at least one is cheaper
11367       // negated.
11368       if (LHSNeg == 2 || RHSNeg == 2)
11369         return DAG.getNode(ISD::FMUL, DL, VT,
11370                            GetNegatedExpression(N0, DAG, LegalOperations),
11371                            GetNegatedExpression(N1, DAG, LegalOperations),
11372                            Flags);
11373     }
11374   }
11375
11376   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11377   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11378   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11379       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11380       TLI.isOperationLegal(ISD::FABS, VT)) {
11381     SDValue Select = N0, X = N1;
11382     if (Select.getOpcode() != ISD::SELECT)
11383       std::swap(Select, X);
11384
11385     SDValue Cond = Select.getOperand(0);
11386     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11387     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11388
11389     if (TrueOpnd && FalseOpnd &&
11390         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11391         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11392         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11393       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11394       switch (CC) {
11395       default: break;
11396       case ISD::SETOLT:
11397       case ISD::SETULT:
11398       case ISD::SETOLE:
11399       case ISD::SETULE:
11400       case ISD::SETLT:
11401       case ISD::SETLE:
11402         std::swap(TrueOpnd, FalseOpnd);
11403         LLVM_FALLTHROUGH;
11404       case ISD::SETOGT:
11405       case ISD::SETUGT:
11406       case ISD::SETOGE:
11407       case ISD::SETUGE:
11408       case ISD::SETGT:
11409       case ISD::SETGE:
11410         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11411             TLI.isOperationLegal(ISD::FNEG, VT))
11412           return DAG.getNode(ISD::FNEG, DL, VT,
11413                    DAG.getNode(ISD::FABS, DL, VT, X));
11414         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11415           return DAG.getNode(ISD::FABS, DL, VT, X);
11416
11417         break;
11418       }
11419     }
11420   }
11421
11422   // FMUL -> FMA combines:
11423   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11424     AddToWorklist(Fused.getNode());
11425     return Fused;
11426   }
11427
11428   return SDValue();
11429 }
11430
11431 SDValue DAGCombiner::visitFMA(SDNode *N) {
11432   SDValue N0 = N->getOperand(0);
11433   SDValue N1 = N->getOperand(1);
11434   SDValue N2 = N->getOperand(2);
11435   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11436   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11437   EVT VT = N->getValueType(0);
11438   SDLoc DL(N);
11439   const TargetOptions &Options = DAG.getTarget().Options;
11440
11441   // FMA nodes have flags that propagate to the created nodes.
11442   const SDNodeFlags Flags = N->getFlags();
11443   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11444
11445   // Constant fold FMA.
11446   if (isa<ConstantFPSDNode>(N0) &&
11447       isa<ConstantFPSDNode>(N1) &&
11448       isa<ConstantFPSDNode>(N2)) {
11449     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11450   }
11451
11452   if (UnsafeFPMath) {
11453     if (N0CFP && N0CFP->isZero())
11454       return N2;
11455     if (N1CFP && N1CFP->isZero())
11456       return N2;
11457   }
11458   // TODO: The FMA node should have flags that propagate to these nodes.
11459   if (N0CFP && N0CFP->isExactlyValue(1.0))
11460     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11461   if (N1CFP && N1CFP->isExactlyValue(1.0))
11462     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11463
11464   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11465   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11466      !isConstantFPBuildVectorOrConstantFP(N1))
11467     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11468
11469   if (UnsafeFPMath) {
11470     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11471     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11472         isConstantFPBuildVectorOrConstantFP(N1) &&
11473         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11474       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11475                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11476                                      Flags), Flags);
11477     }
11478
11479     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11480     if (N0.getOpcode() == ISD::FMUL &&
11481         isConstantFPBuildVectorOrConstantFP(N1) &&
11482         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11483       return DAG.getNode(ISD::FMA, DL, VT,
11484                          N0.getOperand(0),
11485                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11486                                      Flags),
11487                          N2);
11488     }
11489   }
11490
11491   // (fma x, 1, y) -> (fadd x, y)
11492   // (fma x, -1, y) -> (fadd (fneg x), y)
11493   if (N1CFP) {
11494     if (N1CFP->isExactlyValue(1.0))
11495       // TODO: The FMA node should have flags that propagate to this node.
11496       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11497
11498     if (N1CFP->isExactlyValue(-1.0) &&
11499         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11500       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11501       AddToWorklist(RHSNeg.getNode());
11502       // TODO: The FMA node should have flags that propagate to this node.
11503       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11504     }
11505
11506     // fma (fneg x), K, y -> fma x -K, y
11507     if (N0.getOpcode() == ISD::FNEG &&
11508         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11509          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11510       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11511                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11512     }
11513   }
11514
11515   if (UnsafeFPMath) {
11516     // (fma x, c, x) -> (fmul x, (c+1))
11517     if (N1CFP && N0 == N2) {
11518       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11519                          DAG.getNode(ISD::FADD, DL, VT, N1,
11520                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11521                          Flags);
11522     }
11523
11524     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11525     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11526       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11527                          DAG.getNode(ISD::FADD, DL, VT, N1,
11528                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11529                          Flags);
11530     }
11531   }
11532
11533   return SDValue();
11534 }
11535
11536 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11537 // reciprocal.
11538 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11539 // Notice that this is not always beneficial. One reason is different targets
11540 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11541 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11542 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11543 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11544   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11545   const SDNodeFlags Flags = N->getFlags();
11546   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11547     return SDValue();
11548
11549   // Skip if current node is a reciprocal.
11550   SDValue N0 = N->getOperand(0);
11551   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11552   if (N0CFP && N0CFP->isExactlyValue(1.0))
11553     return SDValue();
11554
11555   // Exit early if the target does not want this transform or if there can't
11556   // possibly be enough uses of the divisor to make the transform worthwhile.
11557   SDValue N1 = N->getOperand(1);
11558   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11559   if (!MinUses || N1->use_size() < MinUses)
11560     return SDValue();
11561
11562   // Find all FDIV users of the same divisor.
11563   // Use a set because duplicates may be present in the user list.
11564   SetVector<SDNode *> Users;
11565   for (auto *U : N1->uses()) {
11566     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11567       // This division is eligible for optimization only if global unsafe math
11568       // is enabled or if this division allows reciprocal formation.
11569       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11570         Users.insert(U);
11571     }
11572   }
11573
11574   // Now that we have the actual number of divisor uses, make sure it meets
11575   // the minimum threshold specified by the target.
11576   if (Users.size() < MinUses)
11577     return SDValue();
11578
11579   EVT VT = N->getValueType(0);
11580   SDLoc DL(N);
11581   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11582   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11583
11584   // Dividend / Divisor -> Dividend * Reciprocal
11585   for (auto *U : Users) {
11586     SDValue Dividend = U->getOperand(0);
11587     if (Dividend != FPOne) {
11588       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11589                                     Reciprocal, Flags);
11590       CombineTo(U, NewNode);
11591     } else if (U != Reciprocal.getNode()) {
11592       // In the absence of fast-math-flags, this user node is always the
11593       // same node as Reciprocal, but with FMF they may be different nodes.
11594       CombineTo(U, Reciprocal);
11595     }
11596   }
11597   return SDValue(N, 0);  // N was replaced.
11598 }
11599
11600 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11601   SDValue N0 = N->getOperand(0);
11602   SDValue N1 = N->getOperand(1);
11603   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11604   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11605   EVT VT = N->getValueType(0);
11606   SDLoc DL(N);
11607   const TargetOptions &Options = DAG.getTarget().Options;
11608   SDNodeFlags Flags = N->getFlags();
11609
11610   // fold vector ops
11611   if (VT.isVector())
11612     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11613       return FoldedVOp;
11614
11615   // fold (fdiv c1, c2) -> c1/c2
11616   if (N0CFP && N1CFP)
11617     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11618
11619   if (SDValue NewSel = foldBinOpIntoSelect(N))
11620     return NewSel;
11621
11622   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11623     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11624     if (N1CFP) {
11625       // Compute the reciprocal 1.0 / c2.
11626       const APFloat &N1APF = N1CFP->getValueAPF();
11627       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11628       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11629       // Only do the transform if the reciprocal is a legal fp immediate that
11630       // isn't too nasty (eg NaN, denormal, ...).
11631       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11632           (!LegalOperations ||
11633            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11634            // backend)... we should handle this gracefully after Legalize.
11635            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11636            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11637            TLI.isFPImmLegal(Recip, VT)))
11638         return DAG.getNode(ISD::FMUL, DL, VT, N0,
11639                            DAG.getConstantFP(Recip, DL, VT), Flags);
11640     }
11641
11642     // If this FDIV is part of a reciprocal square root, it may be folded
11643     // into a target-specific square root estimate instruction.
11644     if (N1.getOpcode() == ISD::FSQRT) {
11645       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11646         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11647       }
11648     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11649                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11650       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11651                                           Flags)) {
11652         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11653         AddToWorklist(RV.getNode());
11654         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11655       }
11656     } else if (N1.getOpcode() == ISD::FP_ROUND &&
11657                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11658       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11659                                           Flags)) {
11660         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11661         AddToWorklist(RV.getNode());
11662         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11663       }
11664     } else if (N1.getOpcode() == ISD::FMUL) {
11665       // Look through an FMUL. Even though this won't remove the FDIV directly,
11666       // it's still worthwhile to get rid of the FSQRT if possible.
11667       SDValue SqrtOp;
11668       SDValue OtherOp;
11669       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11670         SqrtOp = N1.getOperand(0);
11671         OtherOp = N1.getOperand(1);
11672       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11673         SqrtOp = N1.getOperand(1);
11674         OtherOp = N1.getOperand(0);
11675       }
11676       if (SqrtOp.getNode()) {
11677         // We found a FSQRT, so try to make this fold:
11678         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11679         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11680           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11681           AddToWorklist(RV.getNode());
11682           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11683         }
11684       }
11685     }
11686
11687     // Fold into a reciprocal estimate and multiply instead of a real divide.
11688     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11689       AddToWorklist(RV.getNode());
11690       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11691     }
11692   }
11693
11694   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11695   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11696     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11697       // Both can be negated for free, check to see if at least one is cheaper
11698       // negated.
11699       if (LHSNeg == 2 || RHSNeg == 2)
11700         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11701                            GetNegatedExpression(N0, DAG, LegalOperations),
11702                            GetNegatedExpression(N1, DAG, LegalOperations),
11703                            Flags);
11704     }
11705   }
11706
11707   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11708     return CombineRepeatedDivisors;
11709
11710   return SDValue();
11711 }
11712
11713 SDValue DAGCombiner::visitFREM(SDNode *N) {
11714   SDValue N0 = N->getOperand(0);
11715   SDValue N1 = N->getOperand(1);
11716   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11717   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11718   EVT VT = N->getValueType(0);
11719
11720   // fold (frem c1, c2) -> fmod(c1,c2)
11721   if (N0CFP && N1CFP)
11722     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11723
11724   if (SDValue NewSel = foldBinOpIntoSelect(N))
11725     return NewSel;
11726
11727   return SDValue();
11728 }
11729
11730 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11731   SDNodeFlags Flags = N->getFlags();
11732   if (!DAG.getTarget().Options.UnsafeFPMath &&
11733       !Flags.hasApproximateFuncs())
11734     return SDValue();
11735
11736   SDValue N0 = N->getOperand(0);
11737   if (TLI.isFsqrtCheap(N0, DAG))
11738     return SDValue();
11739
11740   // FSQRT nodes have flags that propagate to the created nodes.
11741   return buildSqrtEstimate(N0, Flags);
11742 }
11743
11744 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11745 /// copysign(x, fp_round(y)) -> copysign(x, y)
11746 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11747   SDValue N1 = N->getOperand(1);
11748   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11749        N1.getOpcode() == ISD::FP_ROUND)) {
11750     // Do not optimize out type conversion of f128 type yet.
11751     // For some targets like x86_64, configuration is changed to keep one f128
11752     // value in one SSE register, but instruction selection cannot handle
11753     // FCOPYSIGN on SSE registers yet.
11754     EVT N1VT = N1->getValueType(0);
11755     EVT N1Op0VT = N1->getOperand(0).getValueType();
11756     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11757   }
11758   return false;
11759 }
11760
11761 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11762   SDValue N0 = N->getOperand(0);
11763   SDValue N1 = N->getOperand(1);
11764   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11765   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11766   EVT VT = N->getValueType(0);
11767
11768   if (N0CFP && N1CFP) // Constant fold
11769     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11770
11771   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
11772     const APFloat &V = N1C->getValueAPF();
11773     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11774     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11775     if (!V.isNegative()) {
11776       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11777         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11778     } else {
11779       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11780         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11781                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11782     }
11783   }
11784
11785   // copysign(fabs(x), y) -> copysign(x, y)
11786   // copysign(fneg(x), y) -> copysign(x, y)
11787   // copysign(copysign(x,z), y) -> copysign(x, y)
11788   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11789       N0.getOpcode() == ISD::FCOPYSIGN)
11790     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11791
11792   // copysign(x, abs(y)) -> abs(x)
11793   if (N1.getOpcode() == ISD::FABS)
11794     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11795
11796   // copysign(x, copysign(y,z)) -> copysign(x, z)
11797   if (N1.getOpcode() == ISD::FCOPYSIGN)
11798     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11799
11800   // copysign(x, fp_extend(y)) -> copysign(x, y)
11801   // copysign(x, fp_round(y)) -> copysign(x, y)
11802   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11803     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11804
11805   return SDValue();
11806 }
11807
11808 SDValue DAGCombiner::visitFPOW(SDNode *N) {
11809   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
11810   if (!ExponentC)
11811     return SDValue();
11812
11813   // Try to convert x ** (1/3) into cube root.
11814   // TODO: Handle the various flavors of long double.
11815   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
11816   //       Some range near 1/3 should be fine.
11817   EVT VT = N->getValueType(0);
11818   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
11819       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
11820     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
11821     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
11822     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
11823     // For regular numbers, rounding may cause the results to differ.
11824     // Therefore, we require { nsz ninf nnan afn } for this transform.
11825     // TODO: We could select out the special cases if we don't have nsz/ninf.
11826     SDNodeFlags Flags = N->getFlags();
11827     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
11828         !Flags.hasApproximateFuncs())
11829       return SDValue();
11830
11831     // Do not create a cbrt() libcall if the target does not have it, and do not
11832     // turn a pow that has lowering support into a cbrt() libcall.
11833     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
11834         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
11835          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
11836       return SDValue();
11837
11838     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
11839   }
11840
11841   // Try to convert x ** (1/4) into square roots.
11842   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
11843   // TODO: This could be extended (using a target hook) to handle smaller
11844   // power-of-2 fractional exponents.
11845   if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
11846     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
11847     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
11848     // For regular numbers, rounding may cause the results to differ.
11849     // Therefore, we require { nsz ninf afn } for this transform.
11850     // TODO: We could select out the special cases if we don't have nsz/ninf.
11851     SDNodeFlags Flags = N->getFlags();
11852     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
11853         !Flags.hasApproximateFuncs())
11854       return SDValue();
11855
11856     // Don't double the number of libcalls. We are trying to inline fast code.
11857     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
11858       return SDValue();
11859
11860     // Assume that libcalls are the smallest code.
11861     // TODO: This restriction should probably be lifted for vectors.
11862     if (DAG.getMachineFunction().getFunction().optForSize())
11863       return SDValue();
11864
11865     // pow(X, 0.25) --> sqrt(sqrt(X))
11866     SDLoc DL(N);
11867     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
11868     return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
11869   }
11870
11871   return SDValue();
11872 }
11873
11874 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11875                                const TargetLowering &TLI) {
11876   // This optimization is guarded by a function attribute because it may produce
11877   // unexpected results. Ie, programs may be relying on the platform-specific
11878   // undefined behavior when the float-to-int conversion overflows.
11879   const Function &F = DAG.getMachineFunction().getFunction();
11880   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11881   if (StrictOverflow.getValueAsString().equals("false"))
11882     return SDValue();
11883
11884   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11885   // replacing casts with a libcall. We also must be allowed to ignore -0.0
11886   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11887   // conversions would return +0.0.
11888   // FIXME: We should be able to use node-level FMF here.
11889   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11890   EVT VT = N->getValueType(0);
11891   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11892       !DAG.getTarget().Options.NoSignedZerosFPMath)
11893     return SDValue();
11894
11895   // fptosi/fptoui round towards zero, so converting from FP to integer and
11896   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11897   SDValue N0 = N->getOperand(0);
11898   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11899       N0.getOperand(0).getValueType() == VT)
11900     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11901
11902   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11903       N0.getOperand(0).getValueType() == VT)
11904     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11905
11906   return SDValue();
11907 }
11908
11909 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11910   SDValue N0 = N->getOperand(0);
11911   EVT VT = N->getValueType(0);
11912   EVT OpVT = N0.getValueType();
11913
11914   // fold (sint_to_fp c1) -> c1fp
11915   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11916       // ...but only if the target supports immediate floating-point values
11917       (!LegalOperations ||
11918        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11919     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11920
11921   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11922   // but UINT_TO_FP is legal on this target, try to convert.
11923   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
11924       hasOperation(ISD::UINT_TO_FP, OpVT)) {
11925     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11926     if (DAG.SignBitIsZero(N0))
11927       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11928   }
11929
11930   // The next optimizations are desirable only if SELECT_CC can be lowered.
11931   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11932     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11933     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11934         !VT.isVector() &&
11935         (!LegalOperations ||
11936          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11937       SDLoc DL(N);
11938       SDValue Ops[] =
11939         { N0.getOperand(0), N0.getOperand(1),
11940           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11941           N0.getOperand(2) };
11942       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11943     }
11944
11945     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11946     //      (select_cc x, y, 1.0, 0.0,, cc)
11947     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11948         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11949         (!LegalOperations ||
11950          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11951       SDLoc DL(N);
11952       SDValue Ops[] =
11953         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11954           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11955           N0.getOperand(0).getOperand(2) };
11956       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11957     }
11958   }
11959
11960   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11961     return FTrunc;
11962
11963   return SDValue();
11964 }
11965
11966 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11967   SDValue N0 = N->getOperand(0);
11968   EVT VT = N->getValueType(0);
11969   EVT OpVT = N0.getValueType();
11970
11971   // fold (uint_to_fp c1) -> c1fp
11972   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11973       // ...but only if the target supports immediate floating-point values
11974       (!LegalOperations ||
11975        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11976     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11977
11978   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11979   // but SINT_TO_FP is legal on this target, try to convert.
11980   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
11981       hasOperation(ISD::SINT_TO_FP, OpVT)) {
11982     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11983     if (DAG.SignBitIsZero(N0))
11984       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11985   }
11986
11987   // The next optimizations are desirable only if SELECT_CC can be lowered.
11988   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11989     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11990     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11991         (!LegalOperations ||
11992          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11993       SDLoc DL(N);
11994       SDValue Ops[] =
11995         { N0.getOperand(0), N0.getOperand(1),
11996           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11997           N0.getOperand(2) };
11998       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11999     }
12000   }
12001
12002   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12003     return FTrunc;
12004
12005   return SDValue();
12006 }
12007
12008 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12009 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12010   SDValue N0 = N->getOperand(0);
12011   EVT VT = N->getValueType(0);
12012
12013   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12014     return SDValue();
12015
12016   SDValue Src = N0.getOperand(0);
12017   EVT SrcVT = Src.getValueType();
12018   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12019   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12020
12021   // We can safely assume the conversion won't overflow the output range,
12022   // because (for example) (uint8_t)18293.f is undefined behavior.
12023
12024   // Since we can assume the conversion won't overflow, our decision as to
12025   // whether the input will fit in the float should depend on the minimum
12026   // of the input range and output range.
12027
12028   // This means this is also safe for a signed input and unsigned output, since
12029   // a negative input would lead to undefined behavior.
12030   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12031   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12032   unsigned ActualSize = std::min(InputSize, OutputSize);
12033   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12034
12035   // We can only fold away the float conversion if the input range can be
12036   // represented exactly in the float range.
12037   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12038     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12039       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12040                                                        : ISD::ZERO_EXTEND;
12041       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12042     }
12043     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12044       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12045     return DAG.getBitcast(VT, Src);
12046   }
12047   return SDValue();
12048 }
12049
12050 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12051   SDValue N0 = N->getOperand(0);
12052   EVT VT = N->getValueType(0);
12053
12054   // fold (fp_to_sint c1fp) -> c1
12055   if (isConstantFPBuildVectorOrConstantFP(N0))
12056     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12057
12058   return FoldIntToFPToInt(N, DAG);
12059 }
12060
12061 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12062   SDValue N0 = N->getOperand(0);
12063   EVT VT = N->getValueType(0);
12064
12065   // fold (fp_to_uint c1fp) -> c1
12066   if (isConstantFPBuildVectorOrConstantFP(N0))
12067     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12068
12069   return FoldIntToFPToInt(N, DAG);
12070 }
12071
12072 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12073   SDValue N0 = N->getOperand(0);
12074   SDValue N1 = N->getOperand(1);
12075   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12076   EVT VT = N->getValueType(0);
12077
12078   // fold (fp_round c1fp) -> c1fp
12079   if (N0CFP)
12080     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12081
12082   // fold (fp_round (fp_extend x)) -> x
12083   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12084     return N0.getOperand(0);
12085
12086   // fold (fp_round (fp_round x)) -> (fp_round x)
12087   if (N0.getOpcode() == ISD::FP_ROUND) {
12088     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12089     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12090
12091     // Skip this folding if it results in an fp_round from f80 to f16.
12092     //
12093     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12094     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12095     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12096     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12097     // x86.
12098     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12099       return SDValue();
12100
12101     // If the first fp_round isn't a value preserving truncation, it might
12102     // introduce a tie in the second fp_round, that wouldn't occur in the
12103     // single-step fp_round we want to fold to.
12104     // In other words, double rounding isn't the same as rounding.
12105     // Also, this is a value preserving truncation iff both fp_round's are.
12106     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12107       SDLoc DL(N);
12108       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12109                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12110     }
12111   }
12112
12113   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12114   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12115     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12116                               N0.getOperand(0), N1);
12117     AddToWorklist(Tmp.getNode());
12118     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12119                        Tmp, N0.getOperand(1));
12120   }
12121
12122   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12123     return NewVSel;
12124
12125   return SDValue();
12126 }
12127
12128 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12129   SDValue N0 = N->getOperand(0);
12130   EVT VT = N->getValueType(0);
12131   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12132   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12133
12134   // fold (fp_round_inreg c1fp) -> c1fp
12135   if (N0CFP && isTypeLegal(EVT)) {
12136     SDLoc DL(N);
12137     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12138     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12139   }
12140
12141   return SDValue();
12142 }
12143
12144 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12145   SDValue N0 = N->getOperand(0);
12146   EVT VT = N->getValueType(0);
12147
12148   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12149   if (N->hasOneUse() &&
12150       N->use_begin()->getOpcode() == ISD::FP_ROUND)
12151     return SDValue();
12152
12153   // fold (fp_extend c1fp) -> c1fp
12154   if (isConstantFPBuildVectorOrConstantFP(N0))
12155     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12156
12157   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12158   if (N0.getOpcode() == ISD::FP16_TO_FP &&
12159       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12160     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12161
12162   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12163   // value of X.
12164   if (N0.getOpcode() == ISD::FP_ROUND
12165       && N0.getConstantOperandVal(1) == 1) {
12166     SDValue In = N0.getOperand(0);
12167     if (In.getValueType() == VT) return In;
12168     if (VT.bitsLT(In.getValueType()))
12169       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12170                          In, N0.getOperand(1));
12171     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12172   }
12173
12174   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12175   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12176        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12177     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12178     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12179                                      LN0->getChain(),
12180                                      LN0->getBasePtr(), N0.getValueType(),
12181                                      LN0->getMemOperand());
12182     CombineTo(N, ExtLoad);
12183     CombineTo(N0.getNode(),
12184               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12185                           N0.getValueType(), ExtLoad,
12186                           DAG.getIntPtrConstant(1, SDLoc(N0))),
12187               ExtLoad.getValue(1));
12188     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12189   }
12190
12191   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12192     return NewVSel;
12193
12194   return SDValue();
12195 }
12196
12197 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12198   SDValue N0 = N->getOperand(0);
12199   EVT VT = N->getValueType(0);
12200
12201   // fold (fceil c1) -> fceil(c1)
12202   if (isConstantFPBuildVectorOrConstantFP(N0))
12203     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12204
12205   return SDValue();
12206 }
12207
12208 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12209   SDValue N0 = N->getOperand(0);
12210   EVT VT = N->getValueType(0);
12211
12212   // fold (ftrunc c1) -> ftrunc(c1)
12213   if (isConstantFPBuildVectorOrConstantFP(N0))
12214     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12215
12216   // fold ftrunc (known rounded int x) -> x
12217   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12218   // likely to be generated to extract integer from a rounded floating value.
12219   switch (N0.getOpcode()) {
12220   default: break;
12221   case ISD::FRINT:
12222   case ISD::FTRUNC:
12223   case ISD::FNEARBYINT:
12224   case ISD::FFLOOR:
12225   case ISD::FCEIL:
12226     return N0;
12227   }
12228
12229   return SDValue();
12230 }
12231
12232 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12233   SDValue N0 = N->getOperand(0);
12234   EVT VT = N->getValueType(0);
12235
12236   // fold (ffloor c1) -> ffloor(c1)
12237   if (isConstantFPBuildVectorOrConstantFP(N0))
12238     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12239
12240   return SDValue();
12241 }
12242
12243 // FIXME: FNEG and FABS have a lot in common; refactor.
12244 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12245   SDValue N0 = N->getOperand(0);
12246   EVT VT = N->getValueType(0);
12247
12248   // Constant fold FNEG.
12249   if (isConstantFPBuildVectorOrConstantFP(N0))
12250     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12251
12252   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12253                          &DAG.getTarget().Options))
12254     return GetNegatedExpression(N0, DAG, LegalOperations);
12255
12256   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12257   // constant pool values.
12258   if (!TLI.isFNegFree(VT) &&
12259       N0.getOpcode() == ISD::BITCAST &&
12260       N0.getNode()->hasOneUse()) {
12261     SDValue Int = N0.getOperand(0);
12262     EVT IntVT = Int.getValueType();
12263     if (IntVT.isInteger() && !IntVT.isVector()) {
12264       APInt SignMask;
12265       if (N0.getValueType().isVector()) {
12266         // For a vector, get a mask such as 0x80... per scalar element
12267         // and splat it.
12268         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12269         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12270       } else {
12271         // For a scalar, just generate 0x80...
12272         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12273       }
12274       SDLoc DL0(N0);
12275       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12276                         DAG.getConstant(SignMask, DL0, IntVT));
12277       AddToWorklist(Int.getNode());
12278       return DAG.getBitcast(VT, Int);
12279     }
12280   }
12281
12282   // (fneg (fmul c, x)) -> (fmul -c, x)
12283   if (N0.getOpcode() == ISD::FMUL &&
12284       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12285     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12286     if (CFP1) {
12287       APFloat CVal = CFP1->getValueAPF();
12288       CVal.changeSign();
12289       if (Level >= AfterLegalizeDAG &&
12290           (TLI.isFPImmLegal(CVal, VT) ||
12291            TLI.isOperationLegal(ISD::ConstantFP, VT)))
12292         return DAG.getNode(
12293             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12294             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12295             N0->getFlags());
12296     }
12297   }
12298
12299   return SDValue();
12300 }
12301
12302 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12303                             APFloat (*Op)(const APFloat &, const APFloat &)) {
12304   SDValue N0 = N->getOperand(0);
12305   SDValue N1 = N->getOperand(1);
12306   EVT VT = N->getValueType(0);
12307   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12308   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12309
12310   if (N0CFP && N1CFP) {
12311     const APFloat &C0 = N0CFP->getValueAPF();
12312     const APFloat &C1 = N1CFP->getValueAPF();
12313     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12314   }
12315
12316   // Canonicalize to constant on RHS.
12317   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12318       !isConstantFPBuildVectorOrConstantFP(N1))
12319     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12320
12321   return SDValue();
12322 }
12323
12324 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12325   return visitFMinMax(DAG, N, minnum);
12326 }
12327
12328 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12329   return visitFMinMax(DAG, N, maxnum);
12330 }
12331
12332 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12333   return visitFMinMax(DAG, N, minimum);
12334 }
12335
12336 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12337   return visitFMinMax(DAG, N, maximum);
12338 }
12339
12340 SDValue DAGCombiner::visitFABS(SDNode *N) {
12341   SDValue N0 = N->getOperand(0);
12342   EVT VT = N->getValueType(0);
12343
12344   // fold (fabs c1) -> fabs(c1)
12345   if (isConstantFPBuildVectorOrConstantFP(N0))
12346     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12347
12348   // fold (fabs (fabs x)) -> (fabs x)
12349   if (N0.getOpcode() == ISD::FABS)
12350     return N->getOperand(0);
12351
12352   // fold (fabs (fneg x)) -> (fabs x)
12353   // fold (fabs (fcopysign x, y)) -> (fabs x)
12354   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12355     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12356
12357   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12358   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12359     SDValue Int = N0.getOperand(0);
12360     EVT IntVT = Int.getValueType();
12361     if (IntVT.isInteger() && !IntVT.isVector()) {
12362       APInt SignMask;
12363       if (N0.getValueType().isVector()) {
12364         // For a vector, get a mask such as 0x7f... per scalar element
12365         // and splat it.
12366         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12367         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12368       } else {
12369         // For a scalar, just generate 0x7f...
12370         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12371       }
12372       SDLoc DL(N0);
12373       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12374                         DAG.getConstant(SignMask, DL, IntVT));
12375       AddToWorklist(Int.getNode());
12376       return DAG.getBitcast(N->getValueType(0), Int);
12377     }
12378   }
12379
12380   return SDValue();
12381 }
12382
12383 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12384   SDValue Chain = N->getOperand(0);
12385   SDValue N1 = N->getOperand(1);
12386   SDValue N2 = N->getOperand(2);
12387
12388   // If N is a constant we could fold this into a fallthrough or unconditional
12389   // branch. However that doesn't happen very often in normal code, because
12390   // Instcombine/SimplifyCFG should have handled the available opportunities.
12391   // If we did this folding here, it would be necessary to update the
12392   // MachineBasicBlock CFG, which is awkward.
12393
12394   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12395   // on the target.
12396   if (N1.getOpcode() == ISD::SETCC &&
12397       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12398                                    N1.getOperand(0).getValueType())) {
12399     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12400                        Chain, N1.getOperand(2),
12401                        N1.getOperand(0), N1.getOperand(1), N2);
12402   }
12403
12404   if (N1.hasOneUse()) {
12405     if (SDValue NewN1 = rebuildSetCC(N1))
12406       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12407   }
12408
12409   return SDValue();
12410 }
12411
12412 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12413   if (N.getOpcode() == ISD::SRL ||
12414       (N.getOpcode() == ISD::TRUNCATE &&
12415        (N.getOperand(0).hasOneUse() &&
12416         N.getOperand(0).getOpcode() == ISD::SRL))) {
12417     // Look pass the truncate.
12418     if (N.getOpcode() == ISD::TRUNCATE)
12419       N = N.getOperand(0);
12420
12421     // Match this pattern so that we can generate simpler code:
12422     //
12423     //   %a = ...
12424     //   %b = and i32 %a, 2
12425     //   %c = srl i32 %b, 1
12426     //   brcond i32 %c ...
12427     //
12428     // into
12429     //
12430     //   %a = ...
12431     //   %b = and i32 %a, 2
12432     //   %c = setcc eq %b, 0
12433     //   brcond %c ...
12434     //
12435     // This applies only when the AND constant value has one bit set and the
12436     // SRL constant is equal to the log2 of the AND constant. The back-end is
12437     // smart enough to convert the result into a TEST/JMP sequence.
12438     SDValue Op0 = N.getOperand(0);
12439     SDValue Op1 = N.getOperand(1);
12440
12441     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12442       SDValue AndOp1 = Op0.getOperand(1);
12443
12444       if (AndOp1.getOpcode() == ISD::Constant) {
12445         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12446
12447         if (AndConst.isPowerOf2() &&
12448             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12449           SDLoc DL(N);
12450           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12451                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12452                               ISD::SETNE);
12453         }
12454       }
12455     }
12456   }
12457
12458   // Transform br(xor(x, y)) -> br(x != y)
12459   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12460   if (N.getOpcode() == ISD::XOR) {
12461     // Because we may call this on a speculatively constructed
12462     // SimplifiedSetCC Node, we need to simplify this node first.
12463     // Ideally this should be folded into SimplifySetCC and not
12464     // here. For now, grab a handle to N so we don't lose it from
12465     // replacements interal to the visit.
12466     HandleSDNode XORHandle(N);
12467     while (N.getOpcode() == ISD::XOR) {
12468       SDValue Tmp = visitXOR(N.getNode());
12469       // No simplification done.
12470       if (!Tmp.getNode())
12471         break;
12472       // Returning N is form in-visit replacement that may invalidated
12473       // N. Grab value from Handle.
12474       if (Tmp.getNode() == N.getNode())
12475         N = XORHandle.getValue();
12476       else // Node simplified. Try simplifying again.
12477         N = Tmp;
12478     }
12479
12480     if (N.getOpcode() != ISD::XOR)
12481       return N;
12482
12483     SDNode *TheXor = N.getNode();
12484
12485     SDValue Op0 = TheXor->getOperand(0);
12486     SDValue Op1 = TheXor->getOperand(1);
12487
12488     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12489       bool Equal = false;
12490       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12491           Op0.getOpcode() == ISD::XOR) {
12492         TheXor = Op0.getNode();
12493         Equal = true;
12494       }
12495
12496       EVT SetCCVT = N.getValueType();
12497       if (LegalTypes)
12498         SetCCVT = getSetCCResultType(SetCCVT);
12499       // Replace the uses of XOR with SETCC
12500       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12501                           Equal ? ISD::SETEQ : ISD::SETNE);
12502     }
12503   }
12504
12505   return SDValue();
12506 }
12507
12508 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12509 //
12510 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12511   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12512   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12513
12514   // If N is a constant we could fold this into a fallthrough or unconditional
12515   // branch. However that doesn't happen very often in normal code, because
12516   // Instcombine/SimplifyCFG should have handled the available opportunities.
12517   // If we did this folding here, it would be necessary to update the
12518   // MachineBasicBlock CFG, which is awkward.
12519
12520   // Use SimplifySetCC to simplify SETCC's.
12521   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12522                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12523                                false);
12524   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12525
12526   // fold to a simpler setcc
12527   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12528     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12529                        N->getOperand(0), Simp.getOperand(2),
12530                        Simp.getOperand(0), Simp.getOperand(1),
12531                        N->getOperand(4));
12532
12533   return SDValue();
12534 }
12535
12536 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12537 /// and that N may be folded in the load / store addressing mode.
12538 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12539                                     SelectionDAG &DAG,
12540                                     const TargetLowering &TLI) {
12541   EVT VT;
12542   unsigned AS;
12543
12544   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12545     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12546       return false;
12547     VT = LD->getMemoryVT();
12548     AS = LD->getAddressSpace();
12549   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12550     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12551       return false;
12552     VT = ST->getMemoryVT();
12553     AS = ST->getAddressSpace();
12554   } else
12555     return false;
12556
12557   TargetLowering::AddrMode AM;
12558   if (N->getOpcode() == ISD::ADD) {
12559     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12560     if (Offset)
12561       // [reg +/- imm]
12562       AM.BaseOffs = Offset->getSExtValue();
12563     else
12564       // [reg +/- reg]
12565       AM.Scale = 1;
12566   } else if (N->getOpcode() == ISD::SUB) {
12567     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12568     if (Offset)
12569       // [reg +/- imm]
12570       AM.BaseOffs = -Offset->getSExtValue();
12571     else
12572       // [reg +/- reg]
12573       AM.Scale = 1;
12574   } else
12575     return false;
12576
12577   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12578                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12579 }
12580
12581 /// Try turning a load/store into a pre-indexed load/store when the base
12582 /// pointer is an add or subtract and it has other uses besides the load/store.
12583 /// After the transformation, the new indexed load/store has effectively folded
12584 /// the add/subtract in and all of its other uses are redirected to the
12585 /// new load/store.
12586 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12587   if (Level < AfterLegalizeDAG)
12588     return false;
12589
12590   bool isLoad = true;
12591   SDValue Ptr;
12592   EVT VT;
12593   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12594     if (LD->isIndexed())
12595       return false;
12596     VT = LD->getMemoryVT();
12597     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12598         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12599       return false;
12600     Ptr = LD->getBasePtr();
12601   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12602     if (ST->isIndexed())
12603       return false;
12604     VT = ST->getMemoryVT();
12605     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12606         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12607       return false;
12608     Ptr = ST->getBasePtr();
12609     isLoad = false;
12610   } else {
12611     return false;
12612   }
12613
12614   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12615   // out.  There is no reason to make this a preinc/predec.
12616   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12617       Ptr.getNode()->hasOneUse())
12618     return false;
12619
12620   // Ask the target to do addressing mode selection.
12621   SDValue BasePtr;
12622   SDValue Offset;
12623   ISD::MemIndexedMode AM = ISD::UNINDEXED;
12624   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12625     return false;
12626
12627   // Backends without true r+i pre-indexed forms may need to pass a
12628   // constant base with a variable offset so that constant coercion
12629   // will work with the patterns in canonical form.
12630   bool Swapped = false;
12631   if (isa<ConstantSDNode>(BasePtr)) {
12632     std::swap(BasePtr, Offset);
12633     Swapped = true;
12634   }
12635
12636   // Don't create a indexed load / store with zero offset.
12637   if (isNullConstant(Offset))
12638     return false;
12639
12640   // Try turning it into a pre-indexed load / store except when:
12641   // 1) The new base ptr is a frame index.
12642   // 2) If N is a store and the new base ptr is either the same as or is a
12643   //    predecessor of the value being stored.
12644   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12645   //    that would create a cycle.
12646   // 4) All uses are load / store ops that use it as old base ptr.
12647
12648   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
12649   // (plus the implicit offset) to a register to preinc anyway.
12650   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12651     return false;
12652
12653   // Check #2.
12654   if (!isLoad) {
12655     SDValue Val = cast<StoreSDNode>(N)->getValue();
12656     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12657       return false;
12658   }
12659
12660   // Caches for hasPredecessorHelper.
12661   SmallPtrSet<const SDNode *, 32> Visited;
12662   SmallVector<const SDNode *, 16> Worklist;
12663   Worklist.push_back(N);
12664
12665   // If the offset is a constant, there may be other adds of constants that
12666   // can be folded with this one. We should do this to avoid having to keep
12667   // a copy of the original base pointer.
12668   SmallVector<SDNode *, 16> OtherUses;
12669   if (isa<ConstantSDNode>(Offset))
12670     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12671                               UE = BasePtr.getNode()->use_end();
12672          UI != UE; ++UI) {
12673       SDUse &Use = UI.getUse();
12674       // Skip the use that is Ptr and uses of other results from BasePtr's
12675       // node (important for nodes that return multiple results).
12676       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12677         continue;
12678
12679       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12680         continue;
12681
12682       if (Use.getUser()->getOpcode() != ISD::ADD &&
12683           Use.getUser()->getOpcode() != ISD::SUB) {
12684         OtherUses.clear();
12685         break;
12686       }
12687
12688       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12689       if (!isa<ConstantSDNode>(Op1)) {
12690         OtherUses.clear();
12691         break;
12692       }
12693
12694       // FIXME: In some cases, we can be smarter about this.
12695       if (Op1.getValueType() != Offset.getValueType()) {
12696         OtherUses.clear();
12697         break;
12698       }
12699
12700       OtherUses.push_back(Use.getUser());
12701     }
12702
12703   if (Swapped)
12704     std::swap(BasePtr, Offset);
12705
12706   // Now check for #3 and #4.
12707   bool RealUse = false;
12708
12709   for (SDNode *Use : Ptr.getNode()->uses()) {
12710     if (Use == N)
12711       continue;
12712     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12713       return false;
12714
12715     // If Ptr may be folded in addressing mode of other use, then it's
12716     // not profitable to do this transformation.
12717     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12718       RealUse = true;
12719   }
12720
12721   if (!RealUse)
12722     return false;
12723
12724   SDValue Result;
12725   if (isLoad)
12726     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12727                                 BasePtr, Offset, AM);
12728   else
12729     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12730                                  BasePtr, Offset, AM);
12731   ++PreIndexedNodes;
12732   ++NodesCombined;
12733   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12734              Result.getNode()->dump(&DAG); dbgs() << '\n');
12735   WorklistRemover DeadNodes(*this);
12736   if (isLoad) {
12737     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12738     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12739   } else {
12740     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12741   }
12742
12743   // Finally, since the node is now dead, remove it from the graph.
12744   deleteAndRecombine(N);
12745
12746   if (Swapped)
12747     std::swap(BasePtr, Offset);
12748
12749   // Replace other uses of BasePtr that can be updated to use Ptr
12750   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12751     unsigned OffsetIdx = 1;
12752     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12753       OffsetIdx = 0;
12754     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12755            BasePtr.getNode() && "Expected BasePtr operand");
12756
12757     // We need to replace ptr0 in the following expression:
12758     //   x0 * offset0 + y0 * ptr0 = t0
12759     // knowing that
12760     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12761     //
12762     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12763     // indexed load/store and the expression that needs to be re-written.
12764     //
12765     // Therefore, we have:
12766     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12767
12768     ConstantSDNode *CN =
12769       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12770     int X0, X1, Y0, Y1;
12771     const APInt &Offset0 = CN->getAPIntValue();
12772     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12773
12774     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12775     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12776     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12777     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12778
12779     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12780
12781     APInt CNV = Offset0;
12782     if (X0 < 0) CNV = -CNV;
12783     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12784     else CNV = CNV - Offset1;
12785
12786     SDLoc DL(OtherUses[i]);
12787
12788     // We can now generate the new expression.
12789     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12790     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12791
12792     SDValue NewUse = DAG.getNode(Opcode,
12793                                  DL,
12794                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12795     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12796     deleteAndRecombine(OtherUses[i]);
12797   }
12798
12799   // Replace the uses of Ptr with uses of the updated base value.
12800   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12801   deleteAndRecombine(Ptr.getNode());
12802   AddToWorklist(Result.getNode());
12803
12804   return true;
12805 }
12806
12807 /// Try to combine a load/store with a add/sub of the base pointer node into a
12808 /// post-indexed load/store. The transformation folded the add/subtract into the
12809 /// new indexed load/store effectively and all of its uses are redirected to the
12810 /// new load/store.
12811 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12812   if (Level < AfterLegalizeDAG)
12813     return false;
12814
12815   bool isLoad = true;
12816   SDValue Ptr;
12817   EVT VT;
12818   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12819     if (LD->isIndexed())
12820       return false;
12821     VT = LD->getMemoryVT();
12822     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12823         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12824       return false;
12825     Ptr = LD->getBasePtr();
12826   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12827     if (ST->isIndexed())
12828       return false;
12829     VT = ST->getMemoryVT();
12830     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12831         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12832       return false;
12833     Ptr = ST->getBasePtr();
12834     isLoad = false;
12835   } else {
12836     return false;
12837   }
12838
12839   if (Ptr.getNode()->hasOneUse())
12840     return false;
12841
12842   for (SDNode *Op : Ptr.getNode()->uses()) {
12843     if (Op == N ||
12844         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12845       continue;
12846
12847     SDValue BasePtr;
12848     SDValue Offset;
12849     ISD::MemIndexedMode AM = ISD::UNINDEXED;
12850     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12851       // Don't create a indexed load / store with zero offset.
12852       if (isNullConstant(Offset))
12853         continue;
12854
12855       // Try turning it into a post-indexed load / store except when
12856       // 1) All uses are load / store ops that use it as base ptr (and
12857       //    it may be folded as addressing mmode).
12858       // 2) Op must be independent of N, i.e. Op is neither a predecessor
12859       //    nor a successor of N. Otherwise, if Op is folded that would
12860       //    create a cycle.
12861
12862       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12863         continue;
12864
12865       // Check for #1.
12866       bool TryNext = false;
12867       for (SDNode *Use : BasePtr.getNode()->uses()) {
12868         if (Use == Ptr.getNode())
12869           continue;
12870
12871         // If all the uses are load / store addresses, then don't do the
12872         // transformation.
12873         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12874           bool RealUse = false;
12875           for (SDNode *UseUse : Use->uses()) {
12876             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12877               RealUse = true;
12878           }
12879
12880           if (!RealUse) {
12881             TryNext = true;
12882             break;
12883           }
12884         }
12885       }
12886
12887       if (TryNext)
12888         continue;
12889
12890       // Check for #2.
12891       SmallPtrSet<const SDNode *, 32> Visited;
12892       SmallVector<const SDNode *, 8> Worklist;
12893       // Ptr is predecessor to both N and Op.
12894       Visited.insert(Ptr.getNode());
12895       Worklist.push_back(N);
12896       Worklist.push_back(Op);
12897       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
12898           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
12899         SDValue Result = isLoad
12900           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12901                                BasePtr, Offset, AM)
12902           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12903                                 BasePtr, Offset, AM);
12904         ++PostIndexedNodes;
12905         ++NodesCombined;
12906         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12907                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12908                    dbgs() << '\n');
12909         WorklistRemover DeadNodes(*this);
12910         if (isLoad) {
12911           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12912           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12913         } else {
12914           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12915         }
12916
12917         // Finally, since the node is now dead, remove it from the graph.
12918         deleteAndRecombine(N);
12919
12920         // Replace the uses of Use with uses of the updated base value.
12921         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12922                                       Result.getValue(isLoad ? 1 : 0));
12923         deleteAndRecombine(Op);
12924         return true;
12925       }
12926     }
12927   }
12928
12929   return false;
12930 }
12931
12932 /// Return the base-pointer arithmetic from an indexed \p LD.
12933 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12934   ISD::MemIndexedMode AM = LD->getAddressingMode();
12935   assert(AM != ISD::UNINDEXED);
12936   SDValue BP = LD->getOperand(1);
12937   SDValue Inc = LD->getOperand(2);
12938
12939   // Some backends use TargetConstants for load offsets, but don't expect
12940   // TargetConstants in general ADD nodes. We can convert these constants into
12941   // regular Constants (if the constant is not opaque).
12942   assert((Inc.getOpcode() != ISD::TargetConstant ||
12943           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12944          "Cannot split out indexing using opaque target constants");
12945   if (Inc.getOpcode() == ISD::TargetConstant) {
12946     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12947     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12948                           ConstInc->getValueType(0));
12949   }
12950
12951   unsigned Opc =
12952       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12953   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12954 }
12955
12956 static inline int numVectorEltsOrZero(EVT T) {
12957   return T.isVector() ? T.getVectorNumElements() : 0;
12958 }
12959
12960 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
12961   Val = ST->getValue();
12962   EVT STType = Val.getValueType();
12963   EVT STMemType = ST->getMemoryVT();
12964   if (STType == STMemType)
12965     return true;
12966   if (isTypeLegal(STMemType))
12967     return false; // fail.
12968   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
12969       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
12970     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
12971     return true;
12972   }
12973   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
12974       STType.isInteger() && STMemType.isInteger()) {
12975     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
12976     return true;
12977   }
12978   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
12979     Val = DAG.getBitcast(STMemType, Val);
12980     return true;
12981   }
12982   return false; // fail.
12983 }
12984
12985 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
12986   EVT LDMemType = LD->getMemoryVT();
12987   EVT LDType = LD->getValueType(0);
12988   assert(Val.getValueType() == LDMemType &&
12989          "Attempting to extend value of non-matching type");
12990   if (LDType == LDMemType)
12991     return true;
12992   if (LDMemType.isInteger() && LDType.isInteger()) {
12993     switch (LD->getExtensionType()) {
12994     case ISD::NON_EXTLOAD:
12995       Val = DAG.getBitcast(LDType, Val);
12996       return true;
12997     case ISD::EXTLOAD:
12998       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
12999       return true;
13000     case ISD::SEXTLOAD:
13001       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13002       return true;
13003     case ISD::ZEXTLOAD:
13004       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13005       return true;
13006     }
13007   }
13008   return false;
13009 }
13010
13011 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13012   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13013     return SDValue();
13014   SDValue Chain = LD->getOperand(0);
13015   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13016   if (!ST || ST->isVolatile())
13017     return SDValue();
13018
13019   EVT LDType = LD->getValueType(0);
13020   EVT LDMemType = LD->getMemoryVT();
13021   EVT STMemType = ST->getMemoryVT();
13022   EVT STType = ST->getValue().getValueType();
13023
13024   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13025   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13026   int64_t Offset;
13027   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13028     return SDValue();
13029
13030   // Normalize for Endianness. After this Offset=0 will denote that the least
13031   // significant bit in the loaded value maps to the least significant bit in
13032   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13033   // n:th least significant byte of the stored value.
13034   if (DAG.getDataLayout().isBigEndian())
13035     Offset = (STMemType.getStoreSizeInBits() -
13036               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13037
13038   // Check that the stored value cover all bits that are loaded.
13039   bool STCoversLD =
13040       (Offset >= 0) &&
13041       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13042
13043   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13044     if (LD->isIndexed()) {
13045       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13046                     LD->getAddressingMode() == ISD::POST_DEC);
13047       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13048       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13049                              LD->getOperand(1), LD->getOperand(2));
13050       SDValue Ops[] = {Val, Idx, Chain};
13051       return CombineTo(LD, Ops, 3);
13052     }
13053     return CombineTo(LD, Val, Chain);
13054   };
13055
13056   if (!STCoversLD)
13057     return SDValue();
13058
13059   // Memory as copy space (potentially masked).
13060   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13061     // Simple case: Direct non-truncating forwarding
13062     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13063       return ReplaceLd(LD, ST->getValue(), Chain);
13064     // Can we model the truncate and extension with an and mask?
13065     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13066         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13067       // Mask to size of LDMemType
13068       auto Mask =
13069           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13070                                                STMemType.getSizeInBits()),
13071                           SDLoc(ST), STType);
13072       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13073       return ReplaceLd(LD, Val, Chain);
13074     }
13075   }
13076
13077   // TODO: Deal with nonzero offset.
13078   if (LD->getBasePtr().isUndef() || Offset != 0)
13079     return SDValue();
13080   // Model necessary truncations / extenstions.
13081   SDValue Val;
13082   // Truncate Value To Stored Memory Size.
13083   do {
13084     if (!getTruncatedStoreValue(ST, Val))
13085       continue;
13086     if (!isTypeLegal(LDMemType))
13087       continue;
13088     if (STMemType != LDMemType) {
13089       // TODO: Support vectors? This requires extract_subvector/bitcast.
13090       if (!STMemType.isVector() && !LDMemType.isVector() &&
13091           STMemType.isInteger() && LDMemType.isInteger())
13092         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13093       else
13094         continue;
13095     }
13096     if (!extendLoadedValueToExtension(LD, Val))
13097       continue;
13098     return ReplaceLd(LD, Val, Chain);
13099   } while (false);
13100
13101   // On failure, cleanup dead nodes we may have created.
13102   if (Val->use_empty())
13103     deleteAndRecombine(Val.getNode());
13104   return SDValue();
13105 }
13106
13107 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13108   LoadSDNode *LD  = cast<LoadSDNode>(N);
13109   SDValue Chain = LD->getChain();
13110   SDValue Ptr   = LD->getBasePtr();
13111
13112   // If load is not volatile and there are no uses of the loaded value (and
13113   // the updated indexed value in case of indexed loads), change uses of the
13114   // chain value into uses of the chain input (i.e. delete the dead load).
13115   if (!LD->isVolatile()) {
13116     if (N->getValueType(1) == MVT::Other) {
13117       // Unindexed loads.
13118       if (!N->hasAnyUseOfValue(0)) {
13119         // It's not safe to use the two value CombineTo variant here. e.g.
13120         // v1, chain2 = load chain1, loc
13121         // v2, chain3 = load chain2, loc
13122         // v3         = add v2, c
13123         // Now we replace use of chain2 with chain1.  This makes the second load
13124         // isomorphic to the one we are deleting, and thus makes this load live.
13125         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13126                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13127                    dbgs() << "\n");
13128         WorklistRemover DeadNodes(*this);
13129         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13130         AddUsersToWorklist(Chain.getNode());
13131         if (N->use_empty())
13132           deleteAndRecombine(N);
13133
13134         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13135       }
13136     } else {
13137       // Indexed loads.
13138       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13139
13140       // If this load has an opaque TargetConstant offset, then we cannot split
13141       // the indexing into an add/sub directly (that TargetConstant may not be
13142       // valid for a different type of node, and we cannot convert an opaque
13143       // target constant into a regular constant).
13144       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13145                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13146
13147       if (!N->hasAnyUseOfValue(0) &&
13148           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13149         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13150         SDValue Index;
13151         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13152           Index = SplitIndexingFromLoad(LD);
13153           // Try to fold the base pointer arithmetic into subsequent loads and
13154           // stores.
13155           AddUsersToWorklist(N);
13156         } else
13157           Index = DAG.getUNDEF(N->getValueType(1));
13158         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13159                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13160                    dbgs() << " and 2 other values\n");
13161         WorklistRemover DeadNodes(*this);
13162         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13163         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13164         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13165         deleteAndRecombine(N);
13166         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13167       }
13168     }
13169   }
13170
13171   // If this load is directly stored, replace the load value with the stored
13172   // value.
13173   if (auto V = ForwardStoreValueToDirectLoad(LD))
13174     return V;
13175
13176   // Try to infer better alignment information than the load already has.
13177   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13178     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13179       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13180         SDValue NewLoad = DAG.getExtLoad(
13181             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13182             LD->getPointerInfo(), LD->getMemoryVT(), Align,
13183             LD->getMemOperand()->getFlags(), LD->getAAInfo());
13184         // NewLoad will always be N as we are only refining the alignment
13185         assert(NewLoad.getNode() == N);
13186         (void)NewLoad;
13187       }
13188     }
13189   }
13190
13191   if (LD->isUnindexed()) {
13192     // Walk up chain skipping non-aliasing memory nodes.
13193     SDValue BetterChain = FindBetterChain(N, Chain);
13194
13195     // If there is a better chain.
13196     if (Chain != BetterChain) {
13197       SDValue ReplLoad;
13198
13199       // Replace the chain to void dependency.
13200       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13201         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13202                                BetterChain, Ptr, LD->getMemOperand());
13203       } else {
13204         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13205                                   LD->getValueType(0),
13206                                   BetterChain, Ptr, LD->getMemoryVT(),
13207                                   LD->getMemOperand());
13208       }
13209
13210       // Create token factor to keep old chain connected.
13211       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13212                                   MVT::Other, Chain, ReplLoad.getValue(1));
13213
13214       // Replace uses with load result and token factor
13215       return CombineTo(N, ReplLoad.getValue(0), Token);
13216     }
13217   }
13218
13219   // Try transforming N to an indexed load.
13220   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13221     return SDValue(N, 0);
13222
13223   // Try to slice up N to more direct loads if the slices are mapped to
13224   // different register banks or pairing can take place.
13225   if (SliceUpLoad(N))
13226     return SDValue(N, 0);
13227
13228   return SDValue();
13229 }
13230
13231 namespace {
13232
13233 /// Helper structure used to slice a load in smaller loads.
13234 /// Basically a slice is obtained from the following sequence:
13235 /// Origin = load Ty1, Base
13236 /// Shift = srl Ty1 Origin, CstTy Amount
13237 /// Inst = trunc Shift to Ty2
13238 ///
13239 /// Then, it will be rewritten into:
13240 /// Slice = load SliceTy, Base + SliceOffset
13241 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13242 ///
13243 /// SliceTy is deduced from the number of bits that are actually used to
13244 /// build Inst.
13245 struct LoadedSlice {
13246   /// Helper structure used to compute the cost of a slice.
13247   struct Cost {
13248     /// Are we optimizing for code size.
13249     bool ForCodeSize;
13250
13251     /// Various cost.
13252     unsigned Loads = 0;
13253     unsigned Truncates = 0;
13254     unsigned CrossRegisterBanksCopies = 0;
13255     unsigned ZExts = 0;
13256     unsigned Shift = 0;
13257
13258     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13259
13260     /// Get the cost of one isolated slice.
13261     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13262         : ForCodeSize(ForCodeSize), Loads(1) {
13263       EVT TruncType = LS.Inst->getValueType(0);
13264       EVT LoadedType = LS.getLoadedType();
13265       if (TruncType != LoadedType &&
13266           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13267         ZExts = 1;
13268     }
13269
13270     /// Account for slicing gain in the current cost.
13271     /// Slicing provide a few gains like removing a shift or a
13272     /// truncate. This method allows to grow the cost of the original
13273     /// load with the gain from this slice.
13274     void addSliceGain(const LoadedSlice &LS) {
13275       // Each slice saves a truncate.
13276       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13277       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13278                               LS.Inst->getValueType(0)))
13279         ++Truncates;
13280       // If there is a shift amount, this slice gets rid of it.
13281       if (LS.Shift)
13282         ++Shift;
13283       // If this slice can merge a cross register bank copy, account for it.
13284       if (LS.canMergeExpensiveCrossRegisterBankCopy())
13285         ++CrossRegisterBanksCopies;
13286     }
13287
13288     Cost &operator+=(const Cost &RHS) {
13289       Loads += RHS.Loads;
13290       Truncates += RHS.Truncates;
13291       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13292       ZExts += RHS.ZExts;
13293       Shift += RHS.Shift;
13294       return *this;
13295     }
13296
13297     bool operator==(const Cost &RHS) const {
13298       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13299              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13300              ZExts == RHS.ZExts && Shift == RHS.Shift;
13301     }
13302
13303     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13304
13305     bool operator<(const Cost &RHS) const {
13306       // Assume cross register banks copies are as expensive as loads.
13307       // FIXME: Do we want some more target hooks?
13308       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13309       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13310       // Unless we are optimizing for code size, consider the
13311       // expensive operation first.
13312       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13313         return ExpensiveOpsLHS < ExpensiveOpsRHS;
13314       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13315              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13316     }
13317
13318     bool operator>(const Cost &RHS) const { return RHS < *this; }
13319
13320     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13321
13322     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13323   };
13324
13325   // The last instruction that represent the slice. This should be a
13326   // truncate instruction.
13327   SDNode *Inst;
13328
13329   // The original load instruction.
13330   LoadSDNode *Origin;
13331
13332   // The right shift amount in bits from the original load.
13333   unsigned Shift;
13334
13335   // The DAG from which Origin came from.
13336   // This is used to get some contextual information about legal types, etc.
13337   SelectionDAG *DAG;
13338
13339   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13340               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13341       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13342
13343   /// Get the bits used in a chunk of bits \p BitWidth large.
13344   /// \return Result is \p BitWidth and has used bits set to 1 and
13345   ///         not used bits set to 0.
13346   APInt getUsedBits() const {
13347     // Reproduce the trunc(lshr) sequence:
13348     // - Start from the truncated value.
13349     // - Zero extend to the desired bit width.
13350     // - Shift left.
13351     assert(Origin && "No original load to compare against.");
13352     unsigned BitWidth = Origin->getValueSizeInBits(0);
13353     assert(Inst && "This slice is not bound to an instruction");
13354     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13355            "Extracted slice is bigger than the whole type!");
13356     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13357     UsedBits.setAllBits();
13358     UsedBits = UsedBits.zext(BitWidth);
13359     UsedBits <<= Shift;
13360     return UsedBits;
13361   }
13362
13363   /// Get the size of the slice to be loaded in bytes.
13364   unsigned getLoadedSize() const {
13365     unsigned SliceSize = getUsedBits().countPopulation();
13366     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13367     return SliceSize / 8;
13368   }
13369
13370   /// Get the type that will be loaded for this slice.
13371   /// Note: This may not be the final type for the slice.
13372   EVT getLoadedType() const {
13373     assert(DAG && "Missing context");
13374     LLVMContext &Ctxt = *DAG->getContext();
13375     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13376   }
13377
13378   /// Get the alignment of the load used for this slice.
13379   unsigned getAlignment() const {
13380     unsigned Alignment = Origin->getAlignment();
13381     unsigned Offset = getOffsetFromBase();
13382     if (Offset != 0)
13383       Alignment = MinAlign(Alignment, Alignment + Offset);
13384     return Alignment;
13385   }
13386
13387   /// Check if this slice can be rewritten with legal operations.
13388   bool isLegal() const {
13389     // An invalid slice is not legal.
13390     if (!Origin || !Inst || !DAG)
13391       return false;
13392
13393     // Offsets are for indexed load only, we do not handle that.
13394     if (!Origin->getOffset().isUndef())
13395       return false;
13396
13397     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13398
13399     // Check that the type is legal.
13400     EVT SliceType = getLoadedType();
13401     if (!TLI.isTypeLegal(SliceType))
13402       return false;
13403
13404     // Check that the load is legal for this type.
13405     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13406       return false;
13407
13408     // Check that the offset can be computed.
13409     // 1. Check its type.
13410     EVT PtrType = Origin->getBasePtr().getValueType();
13411     if (PtrType == MVT::Untyped || PtrType.isExtended())
13412       return false;
13413
13414     // 2. Check that it fits in the immediate.
13415     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13416       return false;
13417
13418     // 3. Check that the computation is legal.
13419     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13420       return false;
13421
13422     // Check that the zext is legal if it needs one.
13423     EVT TruncateType = Inst->getValueType(0);
13424     if (TruncateType != SliceType &&
13425         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13426       return false;
13427
13428     return true;
13429   }
13430
13431   /// Get the offset in bytes of this slice in the original chunk of
13432   /// bits.
13433   /// \pre DAG != nullptr.
13434   uint64_t getOffsetFromBase() const {
13435     assert(DAG && "Missing context.");
13436     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13437     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13438     uint64_t Offset = Shift / 8;
13439     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13440     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13441            "The size of the original loaded type is not a multiple of a"
13442            " byte.");
13443     // If Offset is bigger than TySizeInBytes, it means we are loading all
13444     // zeros. This should have been optimized before in the process.
13445     assert(TySizeInBytes > Offset &&
13446            "Invalid shift amount for given loaded size");
13447     if (IsBigEndian)
13448       Offset = TySizeInBytes - Offset - getLoadedSize();
13449     return Offset;
13450   }
13451
13452   /// Generate the sequence of instructions to load the slice
13453   /// represented by this object and redirect the uses of this slice to
13454   /// this new sequence of instructions.
13455   /// \pre this->Inst && this->Origin are valid Instructions and this
13456   /// object passed the legal check: LoadedSlice::isLegal returned true.
13457   /// \return The last instruction of the sequence used to load the slice.
13458   SDValue loadSlice() const {
13459     assert(Inst && Origin && "Unable to replace a non-existing slice.");
13460     const SDValue &OldBaseAddr = Origin->getBasePtr();
13461     SDValue BaseAddr = OldBaseAddr;
13462     // Get the offset in that chunk of bytes w.r.t. the endianness.
13463     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13464     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13465     if (Offset) {
13466       // BaseAddr = BaseAddr + Offset.
13467       EVT ArithType = BaseAddr.getValueType();
13468       SDLoc DL(Origin);
13469       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13470                               DAG->getConstant(Offset, DL, ArithType));
13471     }
13472
13473     // Create the type of the loaded slice according to its size.
13474     EVT SliceType = getLoadedType();
13475
13476     // Create the load for the slice.
13477     SDValue LastInst =
13478         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13479                      Origin->getPointerInfo().getWithOffset(Offset),
13480                      getAlignment(), Origin->getMemOperand()->getFlags());
13481     // If the final type is not the same as the loaded type, this means that
13482     // we have to pad with zero. Create a zero extend for that.
13483     EVT FinalType = Inst->getValueType(0);
13484     if (SliceType != FinalType)
13485       LastInst =
13486           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13487     return LastInst;
13488   }
13489
13490   /// Check if this slice can be merged with an expensive cross register
13491   /// bank copy. E.g.,
13492   /// i = load i32
13493   /// f = bitcast i32 i to float
13494   bool canMergeExpensiveCrossRegisterBankCopy() const {
13495     if (!Inst || !Inst->hasOneUse())
13496       return false;
13497     SDNode *Use = *Inst->use_begin();
13498     if (Use->getOpcode() != ISD::BITCAST)
13499       return false;
13500     assert(DAG && "Missing context");
13501     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13502     EVT ResVT = Use->getValueType(0);
13503     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13504     const TargetRegisterClass *ArgRC =
13505         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13506     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13507       return false;
13508
13509     // At this point, we know that we perform a cross-register-bank copy.
13510     // Check if it is expensive.
13511     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13512     // Assume bitcasts are cheap, unless both register classes do not
13513     // explicitly share a common sub class.
13514     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13515       return false;
13516
13517     // Check if it will be merged with the load.
13518     // 1. Check the alignment constraint.
13519     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13520         ResVT.getTypeForEVT(*DAG->getContext()));
13521
13522     if (RequiredAlignment > getAlignment())
13523       return false;
13524
13525     // 2. Check that the load is a legal operation for that type.
13526     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13527       return false;
13528
13529     // 3. Check that we do not have a zext in the way.
13530     if (Inst->getValueType(0) != getLoadedType())
13531       return false;
13532
13533     return true;
13534   }
13535 };
13536
13537 } // end anonymous namespace
13538
13539 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13540 /// \p UsedBits looks like 0..0 1..1 0..0.
13541 static bool areUsedBitsDense(const APInt &UsedBits) {
13542   // If all the bits are one, this is dense!
13543   if (UsedBits.isAllOnesValue())
13544     return true;
13545
13546   // Get rid of the unused bits on the right.
13547   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13548   // Get rid of the unused bits on the left.
13549   if (NarrowedUsedBits.countLeadingZeros())
13550     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13551   // Check that the chunk of bits is completely used.
13552   return NarrowedUsedBits.isAllOnesValue();
13553 }
13554
13555 /// Check whether or not \p First and \p Second are next to each other
13556 /// in memory. This means that there is no hole between the bits loaded
13557 /// by \p First and the bits loaded by \p Second.
13558 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13559                                      const LoadedSlice &Second) {
13560   assert(First.Origin == Second.Origin && First.Origin &&
13561          "Unable to match different memory origins.");
13562   APInt UsedBits = First.getUsedBits();
13563   assert((UsedBits & Second.getUsedBits()) == 0 &&
13564          "Slices are not supposed to overlap.");
13565   UsedBits |= Second.getUsedBits();
13566   return areUsedBitsDense(UsedBits);
13567 }
13568
13569 /// Adjust the \p GlobalLSCost according to the target
13570 /// paring capabilities and the layout of the slices.
13571 /// \pre \p GlobalLSCost should account for at least as many loads as
13572 /// there is in the slices in \p LoadedSlices.
13573 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13574                                  LoadedSlice::Cost &GlobalLSCost) {
13575   unsigned NumberOfSlices = LoadedSlices.size();
13576   // If there is less than 2 elements, no pairing is possible.
13577   if (NumberOfSlices < 2)
13578     return;
13579
13580   // Sort the slices so that elements that are likely to be next to each
13581   // other in memory are next to each other in the list.
13582   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13583     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13584     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13585   });
13586   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13587   // First (resp. Second) is the first (resp. Second) potentially candidate
13588   // to be placed in a paired load.
13589   const LoadedSlice *First = nullptr;
13590   const LoadedSlice *Second = nullptr;
13591   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13592                 // Set the beginning of the pair.
13593                                                            First = Second) {
13594     Second = &LoadedSlices[CurrSlice];
13595
13596     // If First is NULL, it means we start a new pair.
13597     // Get to the next slice.
13598     if (!First)
13599       continue;
13600
13601     EVT LoadedType = First->getLoadedType();
13602
13603     // If the types of the slices are different, we cannot pair them.
13604     if (LoadedType != Second->getLoadedType())
13605       continue;
13606
13607     // Check if the target supplies paired loads for this type.
13608     unsigned RequiredAlignment = 0;
13609     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13610       // move to the next pair, this type is hopeless.
13611       Second = nullptr;
13612       continue;
13613     }
13614     // Check if we meet the alignment requirement.
13615     if (RequiredAlignment > First->getAlignment())
13616       continue;
13617
13618     // Check that both loads are next to each other in memory.
13619     if (!areSlicesNextToEachOther(*First, *Second))
13620       continue;
13621
13622     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13623     --GlobalLSCost.Loads;
13624     // Move to the next pair.
13625     Second = nullptr;
13626   }
13627 }
13628
13629 /// Check the profitability of all involved LoadedSlice.
13630 /// Currently, it is considered profitable if there is exactly two
13631 /// involved slices (1) which are (2) next to each other in memory, and
13632 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13633 ///
13634 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13635 /// the elements themselves.
13636 ///
13637 /// FIXME: When the cost model will be mature enough, we can relax
13638 /// constraints (1) and (2).
13639 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13640                                 const APInt &UsedBits, bool ForCodeSize) {
13641   unsigned NumberOfSlices = LoadedSlices.size();
13642   if (StressLoadSlicing)
13643     return NumberOfSlices > 1;
13644
13645   // Check (1).
13646   if (NumberOfSlices != 2)
13647     return false;
13648
13649   // Check (2).
13650   if (!areUsedBitsDense(UsedBits))
13651     return false;
13652
13653   // Check (3).
13654   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13655   // The original code has one big load.
13656   OrigCost.Loads = 1;
13657   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13658     const LoadedSlice &LS = LoadedSlices[CurrSlice];
13659     // Accumulate the cost of all the slices.
13660     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13661     GlobalSlicingCost += SliceCost;
13662
13663     // Account as cost in the original configuration the gain obtained
13664     // with the current slices.
13665     OrigCost.addSliceGain(LS);
13666   }
13667
13668   // If the target supports paired load, adjust the cost accordingly.
13669   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13670   return OrigCost > GlobalSlicingCost;
13671 }
13672
13673 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13674 /// operations, split it in the various pieces being extracted.
13675 ///
13676 /// This sort of thing is introduced by SROA.
13677 /// This slicing takes care not to insert overlapping loads.
13678 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13679 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13680   if (Level < AfterLegalizeDAG)
13681     return false;
13682
13683   LoadSDNode *LD = cast<LoadSDNode>(N);
13684   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13685       !LD->getValueType(0).isInteger())
13686     return false;
13687
13688   // Keep track of already used bits to detect overlapping values.
13689   // In that case, we will just abort the transformation.
13690   APInt UsedBits(LD->getValueSizeInBits(0), 0);
13691
13692   SmallVector<LoadedSlice, 4> LoadedSlices;
13693
13694   // Check if this load is used as several smaller chunks of bits.
13695   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13696   // of computation for each trunc.
13697   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13698        UI != UIEnd; ++UI) {
13699     // Skip the uses of the chain.
13700     if (UI.getUse().getResNo() != 0)
13701       continue;
13702
13703     SDNode *User = *UI;
13704     unsigned Shift = 0;
13705
13706     // Check if this is a trunc(lshr).
13707     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13708         isa<ConstantSDNode>(User->getOperand(1))) {
13709       Shift = User->getConstantOperandVal(1);
13710       User = *User->use_begin();
13711     }
13712
13713     // At this point, User is a Truncate, iff we encountered, trunc or
13714     // trunc(lshr).
13715     if (User->getOpcode() != ISD::TRUNCATE)
13716       return false;
13717
13718     // The width of the type must be a power of 2 and greater than 8-bits.
13719     // Otherwise the load cannot be represented in LLVM IR.
13720     // Moreover, if we shifted with a non-8-bits multiple, the slice
13721     // will be across several bytes. We do not support that.
13722     unsigned Width = User->getValueSizeInBits(0);
13723     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13724       return false;
13725
13726     // Build the slice for this chain of computations.
13727     LoadedSlice LS(User, LD, Shift, &DAG);
13728     APInt CurrentUsedBits = LS.getUsedBits();
13729
13730     // Check if this slice overlaps with another.
13731     if ((CurrentUsedBits & UsedBits) != 0)
13732       return false;
13733     // Update the bits used globally.
13734     UsedBits |= CurrentUsedBits;
13735
13736     // Check if the new slice would be legal.
13737     if (!LS.isLegal())
13738       return false;
13739
13740     // Record the slice.
13741     LoadedSlices.push_back(LS);
13742   }
13743
13744   // Abort slicing if it does not seem to be profitable.
13745   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13746     return false;
13747
13748   ++SlicedLoads;
13749
13750   // Rewrite each chain to use an independent load.
13751   // By construction, each chain can be represented by a unique load.
13752
13753   // Prepare the argument for the new token factor for all the slices.
13754   SmallVector<SDValue, 8> ArgChains;
13755   for (SmallVectorImpl<LoadedSlice>::const_iterator
13756            LSIt = LoadedSlices.begin(),
13757            LSItEnd = LoadedSlices.end();
13758        LSIt != LSItEnd; ++LSIt) {
13759     SDValue SliceInst = LSIt->loadSlice();
13760     CombineTo(LSIt->Inst, SliceInst, true);
13761     if (SliceInst.getOpcode() != ISD::LOAD)
13762       SliceInst = SliceInst.getOperand(0);
13763     assert(SliceInst->getOpcode() == ISD::LOAD &&
13764            "It takes more than a zext to get to the loaded slice!!");
13765     ArgChains.push_back(SliceInst.getValue(1));
13766   }
13767
13768   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13769                               ArgChains);
13770   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13771   AddToWorklist(Chain.getNode());
13772   return true;
13773 }
13774
13775 /// Check to see if V is (and load (ptr), imm), where the load is having
13776 /// specific bytes cleared out.  If so, return the byte size being masked out
13777 /// and the shift amount.
13778 static std::pair<unsigned, unsigned>
13779 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13780   std::pair<unsigned, unsigned> Result(0, 0);
13781
13782   // Check for the structure we're looking for.
13783   if (V->getOpcode() != ISD::AND ||
13784       !isa<ConstantSDNode>(V->getOperand(1)) ||
13785       !ISD::isNormalLoad(V->getOperand(0).getNode()))
13786     return Result;
13787
13788   // Check the chain and pointer.
13789   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13790   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
13791
13792   // This only handles simple types.
13793   if (V.getValueType() != MVT::i16 &&
13794       V.getValueType() != MVT::i32 &&
13795       V.getValueType() != MVT::i64)
13796     return Result;
13797
13798   // Check the constant mask.  Invert it so that the bits being masked out are
13799   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
13800   // follow the sign bit for uniformity.
13801   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13802   unsigned NotMaskLZ = countLeadingZeros(NotMask);
13803   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
13804   unsigned NotMaskTZ = countTrailingZeros(NotMask);
13805   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
13806   if (NotMaskLZ == 64) return Result;  // All zero mask.
13807
13808   // See if we have a continuous run of bits.  If so, we have 0*1+0*
13809   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13810     return Result;
13811
13812   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13813   if (V.getValueType() != MVT::i64 && NotMaskLZ)
13814     NotMaskLZ -= 64-V.getValueSizeInBits();
13815
13816   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13817   switch (MaskedBytes) {
13818   case 1:
13819   case 2:
13820   case 4: break;
13821   default: return Result; // All one mask, or 5-byte mask.
13822   }
13823
13824   // Verify that the first bit starts at a multiple of mask so that the access
13825   // is aligned the same as the access width.
13826   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13827
13828   // For narrowing to be valid, it must be the case that the load the
13829   // immediately preceeding memory operation before the store.
13830   if (LD == Chain.getNode())
13831     ; // ok.
13832   else if (Chain->getOpcode() == ISD::TokenFactor &&
13833            SDValue(LD, 1).hasOneUse()) {
13834     // LD has only 1 chain use so they are no indirect dependencies.
13835     bool isOk = false;
13836     for (const SDValue &ChainOp : Chain->op_values())
13837       if (ChainOp.getNode() == LD) {
13838         isOk = true;
13839         break;
13840       }
13841     if (!isOk)
13842       return Result;
13843   } else
13844     return Result; // Fail.
13845
13846   Result.first = MaskedBytes;
13847   Result.second = NotMaskTZ/8;
13848   return Result;
13849 }
13850
13851 /// Check to see if IVal is something that provides a value as specified by
13852 /// MaskInfo. If so, replace the specified store with a narrower store of
13853 /// truncated IVal.
13854 static SDNode *
13855 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13856                                 SDValue IVal, StoreSDNode *St,
13857                                 DAGCombiner *DC) {
13858   unsigned NumBytes = MaskInfo.first;
13859   unsigned ByteShift = MaskInfo.second;
13860   SelectionDAG &DAG = DC->getDAG();
13861
13862   // Check to see if IVal is all zeros in the part being masked in by the 'or'
13863   // that uses this.  If not, this is not a replacement.
13864   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13865                                   ByteShift*8, (ByteShift+NumBytes)*8);
13866   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13867
13868   // Check that it is legal on the target to do this.  It is legal if the new
13869   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13870   // legalization.
13871   MVT VT = MVT::getIntegerVT(NumBytes*8);
13872   if (!DC->isTypeLegal(VT))
13873     return nullptr;
13874
13875   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
13876   // shifted by ByteShift and truncated down to NumBytes.
13877   if (ByteShift) {
13878     SDLoc DL(IVal);
13879     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13880                        DAG.getConstant(ByteShift*8, DL,
13881                                     DC->getShiftAmountTy(IVal.getValueType())));
13882   }
13883
13884   // Figure out the offset for the store and the alignment of the access.
13885   unsigned StOffset;
13886   unsigned NewAlign = St->getAlignment();
13887
13888   if (DAG.getDataLayout().isLittleEndian())
13889     StOffset = ByteShift;
13890   else
13891     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13892
13893   SDValue Ptr = St->getBasePtr();
13894   if (StOffset) {
13895     SDLoc DL(IVal);
13896     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13897                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13898     NewAlign = MinAlign(NewAlign, StOffset);
13899   }
13900
13901   // Truncate down to the new size.
13902   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13903
13904   ++OpsNarrowed;
13905   return DAG
13906       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13907                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13908       .getNode();
13909 }
13910
13911 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13912 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13913 /// narrowing the load and store if it would end up being a win for performance
13914 /// or code size.
13915 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13916   StoreSDNode *ST  = cast<StoreSDNode>(N);
13917   if (ST->isVolatile())
13918     return SDValue();
13919
13920   SDValue Chain = ST->getChain();
13921   SDValue Value = ST->getValue();
13922   SDValue Ptr   = ST->getBasePtr();
13923   EVT VT = Value.getValueType();
13924
13925   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13926     return SDValue();
13927
13928   unsigned Opc = Value.getOpcode();
13929
13930   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13931   // is a byte mask indicating a consecutive number of bytes, check to see if
13932   // Y is known to provide just those bytes.  If so, we try to replace the
13933   // load + replace + store sequence with a single (narrower) store, which makes
13934   // the load dead.
13935   if (Opc == ISD::OR) {
13936     std::pair<unsigned, unsigned> MaskedLoad;
13937     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13938     if (MaskedLoad.first)
13939       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13940                                                   Value.getOperand(1), ST,this))
13941         return SDValue(NewST, 0);
13942
13943     // Or is commutative, so try swapping X and Y.
13944     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13945     if (MaskedLoad.first)
13946       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13947                                                   Value.getOperand(0), ST,this))
13948         return SDValue(NewST, 0);
13949   }
13950
13951   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13952       Value.getOperand(1).getOpcode() != ISD::Constant)
13953     return SDValue();
13954
13955   SDValue N0 = Value.getOperand(0);
13956   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13957       Chain == SDValue(N0.getNode(), 1)) {
13958     LoadSDNode *LD = cast<LoadSDNode>(N0);
13959     if (LD->getBasePtr() != Ptr ||
13960         LD->getPointerInfo().getAddrSpace() !=
13961         ST->getPointerInfo().getAddrSpace())
13962       return SDValue();
13963
13964     // Find the type to narrow it the load / op / store to.
13965     SDValue N1 = Value.getOperand(1);
13966     unsigned BitWidth = N1.getValueSizeInBits();
13967     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13968     if (Opc == ISD::AND)
13969       Imm ^= APInt::getAllOnesValue(BitWidth);
13970     if (Imm == 0 || Imm.isAllOnesValue())
13971       return SDValue();
13972     unsigned ShAmt = Imm.countTrailingZeros();
13973     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13974     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13975     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13976     // The narrowing should be profitable, the load/store operation should be
13977     // legal (or custom) and the store size should be equal to the NewVT width.
13978     while (NewBW < BitWidth &&
13979            (NewVT.getStoreSizeInBits() != NewBW ||
13980             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13981             !TLI.isNarrowingProfitable(VT, NewVT))) {
13982       NewBW = NextPowerOf2(NewBW);
13983       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13984     }
13985     if (NewBW >= BitWidth)
13986       return SDValue();
13987
13988     // If the lsb changed does not start at the type bitwidth boundary,
13989     // start at the previous one.
13990     if (ShAmt % NewBW)
13991       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13992     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13993                                    std::min(BitWidth, ShAmt + NewBW));
13994     if ((Imm & Mask) == Imm) {
13995       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13996       if (Opc == ISD::AND)
13997         NewImm ^= APInt::getAllOnesValue(NewBW);
13998       uint64_t PtrOff = ShAmt / 8;
13999       // For big endian targets, we need to adjust the offset to the pointer to
14000       // load the correct bytes.
14001       if (DAG.getDataLayout().isBigEndian())
14002         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14003
14004       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14005       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14006       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14007         return SDValue();
14008
14009       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14010                                    Ptr.getValueType(), Ptr,
14011                                    DAG.getConstant(PtrOff, SDLoc(LD),
14012                                                    Ptr.getValueType()));
14013       SDValue NewLD =
14014           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14015                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14016                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14017       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14018                                    DAG.getConstant(NewImm, SDLoc(Value),
14019                                                    NewVT));
14020       SDValue NewST =
14021           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14022                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14023
14024       AddToWorklist(NewPtr.getNode());
14025       AddToWorklist(NewLD.getNode());
14026       AddToWorklist(NewVal.getNode());
14027       WorklistRemover DeadNodes(*this);
14028       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14029       ++OpsNarrowed;
14030       return NewST;
14031     }
14032   }
14033
14034   return SDValue();
14035 }
14036
14037 /// For a given floating point load / store pair, if the load value isn't used
14038 /// by any other operations, then consider transforming the pair to integer
14039 /// load / store operations if the target deems the transformation profitable.
14040 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14041   StoreSDNode *ST  = cast<StoreSDNode>(N);
14042   SDValue Chain = ST->getChain();
14043   SDValue Value = ST->getValue();
14044   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14045       Value.hasOneUse() &&
14046       Chain == SDValue(Value.getNode(), 1)) {
14047     LoadSDNode *LD = cast<LoadSDNode>(Value);
14048     EVT VT = LD->getMemoryVT();
14049     if (!VT.isFloatingPoint() ||
14050         VT != ST->getMemoryVT() ||
14051         LD->isNonTemporal() ||
14052         ST->isNonTemporal() ||
14053         LD->getPointerInfo().getAddrSpace() != 0 ||
14054         ST->getPointerInfo().getAddrSpace() != 0)
14055       return SDValue();
14056
14057     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14058     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14059         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14060         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14061         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14062       return SDValue();
14063
14064     unsigned LDAlign = LD->getAlignment();
14065     unsigned STAlign = ST->getAlignment();
14066     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14067     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14068     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14069       return SDValue();
14070
14071     SDValue NewLD =
14072         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14073                     LD->getPointerInfo(), LDAlign);
14074
14075     SDValue NewST =
14076         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14077                      ST->getPointerInfo(), STAlign);
14078
14079     AddToWorklist(NewLD.getNode());
14080     AddToWorklist(NewST.getNode());
14081     WorklistRemover DeadNodes(*this);
14082     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14083     ++LdStFP2Int;
14084     return NewST;
14085   }
14086
14087   return SDValue();
14088 }
14089
14090 // This is a helper function for visitMUL to check the profitability
14091 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14092 // MulNode is the original multiply, AddNode is (add x, c1),
14093 // and ConstNode is c2.
14094 //
14095 // If the (add x, c1) has multiple uses, we could increase
14096 // the number of adds if we make this transformation.
14097 // It would only be worth doing this if we can remove a
14098 // multiply in the process. Check for that here.
14099 // To illustrate:
14100 //     (A + c1) * c3
14101 //     (A + c2) * c3
14102 // We're checking for cases where we have common "c3 * A" expressions.
14103 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14104                                               SDValue &AddNode,
14105                                               SDValue &ConstNode) {
14106   APInt Val;
14107
14108   // If the add only has one use, this would be OK to do.
14109   if (AddNode.getNode()->hasOneUse())
14110     return true;
14111
14112   // Walk all the users of the constant with which we're multiplying.
14113   for (SDNode *Use : ConstNode->uses()) {
14114     if (Use == MulNode) // This use is the one we're on right now. Skip it.
14115       continue;
14116
14117     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14118       SDNode *OtherOp;
14119       SDNode *MulVar = AddNode.getOperand(0).getNode();
14120
14121       // OtherOp is what we're multiplying against the constant.
14122       if (Use->getOperand(0) == ConstNode)
14123         OtherOp = Use->getOperand(1).getNode();
14124       else
14125         OtherOp = Use->getOperand(0).getNode();
14126
14127       // Check to see if multiply is with the same operand of our "add".
14128       //
14129       //     ConstNode  = CONST
14130       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
14131       //     ...
14132       //     AddNode  = (A + c1)  <-- MulVar is A.
14133       //         = AddNode * ConstNode   <-- current visiting instruction.
14134       //
14135       // If we make this transformation, we will have a common
14136       // multiply (ConstNode * A) that we can save.
14137       if (OtherOp == MulVar)
14138         return true;
14139
14140       // Now check to see if a future expansion will give us a common
14141       // multiply.
14142       //
14143       //     ConstNode  = CONST
14144       //     AddNode    = (A + c1)
14145       //     ...   = AddNode * ConstNode <-- current visiting instruction.
14146       //     ...
14147       //     OtherOp = (A + c2)
14148       //     Use     = OtherOp * ConstNode <-- visiting Use.
14149       //
14150       // If we make this transformation, we will have a common
14151       // multiply (CONST * A) after we also do the same transformation
14152       // to the "t2" instruction.
14153       if (OtherOp->getOpcode() == ISD::ADD &&
14154           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14155           OtherOp->getOperand(0).getNode() == MulVar)
14156         return true;
14157     }
14158   }
14159
14160   // Didn't find a case where this would be profitable.
14161   return false;
14162 }
14163
14164 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14165                                          unsigned NumStores) {
14166   SmallVector<SDValue, 8> Chains;
14167   SmallPtrSet<const SDNode *, 8> Visited;
14168   SDLoc StoreDL(StoreNodes[0].MemNode);
14169
14170   for (unsigned i = 0; i < NumStores; ++i) {
14171     Visited.insert(StoreNodes[i].MemNode);
14172   }
14173
14174   // don't include nodes that are children
14175   for (unsigned i = 0; i < NumStores; ++i) {
14176     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
14177       Chains.push_back(StoreNodes[i].MemNode->getChain());
14178   }
14179
14180   assert(Chains.size() > 0 && "Chain should have generated a chain");
14181   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
14182 }
14183
14184 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14185     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14186     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14187   // Make sure we have something to merge.
14188   if (NumStores < 2)
14189     return false;
14190
14191   // The latest Node in the DAG.
14192   SDLoc DL(StoreNodes[0].MemNode);
14193
14194   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14195   unsigned SizeInBits = NumStores * ElementSizeBits;
14196   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14197
14198   EVT StoreTy;
14199   if (UseVector) {
14200     unsigned Elts = NumStores * NumMemElts;
14201     // Get the type for the merged vector store.
14202     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14203   } else
14204     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14205
14206   SDValue StoredVal;
14207   if (UseVector) {
14208     if (IsConstantSrc) {
14209       SmallVector<SDValue, 8> BuildVector;
14210       for (unsigned I = 0; I != NumStores; ++I) {
14211         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14212         SDValue Val = St->getValue();
14213         // If constant is of the wrong type, convert it now.
14214         if (MemVT != Val.getValueType()) {
14215           Val = peekThroughBitcasts(Val);
14216           // Deal with constants of wrong size.
14217           if (ElementSizeBits != Val.getValueSizeInBits()) {
14218             EVT IntMemVT =
14219                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14220             if (isa<ConstantFPSDNode>(Val)) {
14221               // Not clear how to truncate FP values.
14222               return false;
14223             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14224               Val = DAG.getConstant(C->getAPIntValue()
14225                                         .zextOrTrunc(Val.getValueSizeInBits())
14226                                         .zextOrTrunc(ElementSizeBits),
14227                                     SDLoc(C), IntMemVT);
14228           }
14229           // Make sure correctly size type is the correct type.
14230           Val = DAG.getBitcast(MemVT, Val);
14231         }
14232         BuildVector.push_back(Val);
14233       }
14234       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14235                                                : ISD::BUILD_VECTOR,
14236                               DL, StoreTy, BuildVector);
14237     } else {
14238       SmallVector<SDValue, 8> Ops;
14239       for (unsigned i = 0; i < NumStores; ++i) {
14240         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14241         SDValue Val = peekThroughBitcasts(St->getValue());
14242         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14243         // type MemVT. If the underlying value is not the correct
14244         // type, but it is an extraction of an appropriate vector we
14245         // can recast Val to be of the correct type. This may require
14246         // converting between EXTRACT_VECTOR_ELT and
14247         // EXTRACT_SUBVECTOR.
14248         if ((MemVT != Val.getValueType()) &&
14249             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14250              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14251           EVT MemVTScalarTy = MemVT.getScalarType();
14252           // We may need to add a bitcast here to get types to line up.
14253           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14254             Val = DAG.getBitcast(MemVT, Val);
14255           } else {
14256             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14257                                             : ISD::EXTRACT_VECTOR_ELT;
14258             SDValue Vec = Val.getOperand(0);
14259             SDValue Idx = Val.getOperand(1);
14260             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14261           }
14262         }
14263         Ops.push_back(Val);
14264       }
14265
14266       // Build the extracted vector elements back into a vector.
14267       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14268                                                : ISD::BUILD_VECTOR,
14269                               DL, StoreTy, Ops);
14270     }
14271   } else {
14272     // We should always use a vector store when merging extracted vector
14273     // elements, so this path implies a store of constants.
14274     assert(IsConstantSrc && "Merged vector elements should use vector store");
14275
14276     APInt StoreInt(SizeInBits, 0);
14277
14278     // Construct a single integer constant which is made of the smaller
14279     // constant inputs.
14280     bool IsLE = DAG.getDataLayout().isLittleEndian();
14281     for (unsigned i = 0; i < NumStores; ++i) {
14282       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14283       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14284
14285       SDValue Val = St->getValue();
14286       Val = peekThroughBitcasts(Val);
14287       StoreInt <<= ElementSizeBits;
14288       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14289         StoreInt |= C->getAPIntValue()
14290                         .zextOrTrunc(ElementSizeBits)
14291                         .zextOrTrunc(SizeInBits);
14292       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14293         StoreInt |= C->getValueAPF()
14294                         .bitcastToAPInt()
14295                         .zextOrTrunc(ElementSizeBits)
14296                         .zextOrTrunc(SizeInBits);
14297         // If fp truncation is necessary give up for now.
14298         if (MemVT.getSizeInBits() != ElementSizeBits)
14299           return false;
14300       } else {
14301         llvm_unreachable("Invalid constant element type");
14302       }
14303     }
14304
14305     // Create the new Load and Store operations.
14306     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14307   }
14308
14309   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14310   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14311
14312   // make sure we use trunc store if it's necessary to be legal.
14313   SDValue NewStore;
14314   if (!UseTrunc) {
14315     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14316                             FirstInChain->getPointerInfo(),
14317                             FirstInChain->getAlignment());
14318   } else { // Must be realized as a trunc store
14319     EVT LegalizedStoredValTy =
14320         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14321     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14322     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14323     SDValue ExtendedStoreVal =
14324         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14325                         LegalizedStoredValTy);
14326     NewStore = DAG.getTruncStore(
14327         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14328         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14329         FirstInChain->getAlignment(),
14330         FirstInChain->getMemOperand()->getFlags());
14331   }
14332
14333   // Replace all merged stores with the new store.
14334   for (unsigned i = 0; i < NumStores; ++i)
14335     CombineTo(StoreNodes[i].MemNode, NewStore);
14336
14337   AddToWorklist(NewChain.getNode());
14338   return true;
14339 }
14340
14341 void DAGCombiner::getStoreMergeCandidates(
14342     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14343     SDNode *&RootNode) {
14344   // This holds the base pointer, index, and the offset in bytes from the base
14345   // pointer.
14346   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14347   EVT MemVT = St->getMemoryVT();
14348
14349   SDValue Val = peekThroughBitcasts(St->getValue());
14350   // We must have a base and an offset.
14351   if (!BasePtr.getBase().getNode())
14352     return;
14353
14354   // Do not handle stores to undef base pointers.
14355   if (BasePtr.getBase().isUndef())
14356     return;
14357
14358   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14359   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14360                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14361   bool IsLoadSrc = isa<LoadSDNode>(Val);
14362   BaseIndexOffset LBasePtr;
14363   // Match on loadbaseptr if relevant.
14364   EVT LoadVT;
14365   if (IsLoadSrc) {
14366     auto *Ld = cast<LoadSDNode>(Val);
14367     LBasePtr = BaseIndexOffset::match(Ld, DAG);
14368     LoadVT = Ld->getMemoryVT();
14369     // Load and store should be the same type.
14370     if (MemVT != LoadVT)
14371       return;
14372     // Loads must only have one use.
14373     if (!Ld->hasNUsesOfValue(1, 0))
14374       return;
14375     // The memory operands must not be volatile.
14376     if (Ld->isVolatile() || Ld->isIndexed())
14377       return;
14378   }
14379   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14380                             int64_t &Offset) -> bool {
14381     if (Other->isVolatile() || Other->isIndexed())
14382       return false;
14383     SDValue Val = peekThroughBitcasts(Other->getValue());
14384     // Allow merging constants of different types as integers.
14385     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14386                                            : Other->getMemoryVT() != MemVT;
14387     if (IsLoadSrc) {
14388       if (NoTypeMatch)
14389         return false;
14390       // The Load's Base Ptr must also match
14391       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14392         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14393         if (LoadVT != OtherLd->getMemoryVT())
14394           return false;
14395         // Loads must only have one use.
14396         if (!OtherLd->hasNUsesOfValue(1, 0))
14397           return false;
14398         // The memory operands must not be volatile.
14399         if (OtherLd->isVolatile() || OtherLd->isIndexed())
14400           return false;
14401         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14402           return false;
14403       } else
14404         return false;
14405     }
14406     if (IsConstantSrc) {
14407       if (NoTypeMatch)
14408         return false;
14409       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14410         return false;
14411     }
14412     if (IsExtractVecSrc) {
14413       // Do not merge truncated stores here.
14414       if (Other->isTruncatingStore())
14415         return false;
14416       if (!MemVT.bitsEq(Val.getValueType()))
14417         return false;
14418       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14419           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14420         return false;
14421     }
14422     Ptr = BaseIndexOffset::match(Other, DAG);
14423     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14424   };
14425
14426   // We looking for a root node which is an ancestor to all mergable
14427   // stores. We search up through a load, to our root and then down
14428   // through all children. For instance we will find Store{1,2,3} if
14429   // St is Store1, Store2. or Store3 where the root is not a load
14430   // which always true for nonvolatile ops. TODO: Expand
14431   // the search to find all valid candidates through multiple layers of loads.
14432   //
14433   // Root
14434   // |-------|-------|
14435   // Load    Load    Store3
14436   // |       |
14437   // Store1   Store2
14438   //
14439   // FIXME: We should be able to climb and
14440   // descend TokenFactors to find candidates as well.
14441
14442   RootNode = St->getChain().getNode();
14443
14444   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14445     RootNode = Ldn->getChain().getNode();
14446     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14447       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14448         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14449           if (I2.getOperandNo() == 0)
14450             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14451               BaseIndexOffset Ptr;
14452               int64_t PtrDiff;
14453               if (CandidateMatch(OtherST, Ptr, PtrDiff))
14454                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14455             }
14456   } else
14457     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14458       if (I.getOperandNo() == 0)
14459         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14460           BaseIndexOffset Ptr;
14461           int64_t PtrDiff;
14462           if (CandidateMatch(OtherST, Ptr, PtrDiff))
14463             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14464         }
14465 }
14466
14467 // We need to check that merging these stores does not cause a loop in
14468 // the DAG. Any store candidate may depend on another candidate
14469 // indirectly through its operand (we already consider dependencies
14470 // through the chain). Check in parallel by searching up from
14471 // non-chain operands of candidates.
14472 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14473     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14474     SDNode *RootNode) {
14475   // FIXME: We should be able to truncate a full search of
14476   // predecessors by doing a BFS and keeping tabs the originating
14477   // stores from which worklist nodes come from in a similar way to
14478   // TokenFactor simplfication.
14479
14480   SmallPtrSet<const SDNode *, 32> Visited;
14481   SmallVector<const SDNode *, 8> Worklist;
14482
14483   // RootNode is a predecessor to all candidates so we need not search
14484   // past it. Add RootNode (peeking through TokenFactors). Do not count
14485   // these towards size check.
14486
14487   Worklist.push_back(RootNode);
14488   while (!Worklist.empty()) {
14489     auto N = Worklist.pop_back_val();
14490     if (!Visited.insert(N).second)
14491       continue; // Already present in Visited.
14492     if (N->getOpcode() == ISD::TokenFactor) {
14493       for (SDValue Op : N->ops())
14494         Worklist.push_back(Op.getNode());
14495     }
14496   }
14497
14498   // Don't count pruning nodes towards max.
14499   unsigned int Max = 1024 + Visited.size();
14500   // Search Ops of store candidates.
14501   for (unsigned i = 0; i < NumStores; ++i) {
14502     SDNode *N = StoreNodes[i].MemNode;
14503     // Of the 4 Store Operands:
14504     //   * Chain (Op 0) -> We have already considered these
14505     //                    in candidate selection and can be
14506     //                    safely ignored
14507     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14508     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14509     //                       but aren't necessarily fromt the same base node, so
14510     //                       cycles possible (e.g. via indexed store).
14511     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14512     //               non-indexed stores). Not constant on all targets (e.g. ARM)
14513     //               and so can participate in a cycle.
14514     for (unsigned j = 1; j < N->getNumOperands(); ++j)
14515       Worklist.push_back(N->getOperand(j).getNode());
14516   }
14517   // Search through DAG. We can stop early if we find a store node.
14518   for (unsigned i = 0; i < NumStores; ++i)
14519     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14520                                      Max))
14521       return false;
14522   return true;
14523 }
14524
14525 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14526   if (OptLevel == CodeGenOpt::None)
14527     return false;
14528
14529   EVT MemVT = St->getMemoryVT();
14530   int64_t ElementSizeBytes = MemVT.getStoreSize();
14531   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14532
14533   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14534     return false;
14535
14536   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14537       Attribute::NoImplicitFloat);
14538
14539   // This function cannot currently deal with non-byte-sized memory sizes.
14540   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14541     return false;
14542
14543   if (!MemVT.isSimple())
14544     return false;
14545
14546   // Perform an early exit check. Do not bother looking at stored values that
14547   // are not constants, loads, or extracted vector elements.
14548   SDValue StoredVal = peekThroughBitcasts(St->getValue());
14549   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14550   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14551                        isa<ConstantFPSDNode>(StoredVal);
14552   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14553                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14554
14555   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14556     return false;
14557
14558   SmallVector<MemOpLink, 8> StoreNodes;
14559   SDNode *RootNode;
14560   // Find potential store merge candidates by searching through chain sub-DAG
14561   getStoreMergeCandidates(St, StoreNodes, RootNode);
14562
14563   // Check if there is anything to merge.
14564   if (StoreNodes.size() < 2)
14565     return false;
14566
14567   // Sort the memory operands according to their distance from the
14568   // base pointer.
14569   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14570     return LHS.OffsetFromBase < RHS.OffsetFromBase;
14571   });
14572
14573   // Store Merge attempts to merge the lowest stores. This generally
14574   // works out as if successful, as the remaining stores are checked
14575   // after the first collection of stores is merged. However, in the
14576   // case that a non-mergeable store is found first, e.g., {p[-2],
14577   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14578   // mergeable cases. To prevent this, we prune such stores from the
14579   // front of StoreNodes here.
14580
14581   bool RV = false;
14582   while (StoreNodes.size() > 1) {
14583     unsigned StartIdx = 0;
14584     while ((StartIdx + 1 < StoreNodes.size()) &&
14585            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14586                StoreNodes[StartIdx + 1].OffsetFromBase)
14587       ++StartIdx;
14588
14589     // Bail if we don't have enough candidates to merge.
14590     if (StartIdx + 1 >= StoreNodes.size())
14591       return RV;
14592
14593     if (StartIdx)
14594       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14595
14596     // Scan the memory operations on the chain and find the first
14597     // non-consecutive store memory address.
14598     unsigned NumConsecutiveStores = 1;
14599     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14600     // Check that the addresses are consecutive starting from the second
14601     // element in the list of stores.
14602     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14603       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14604       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14605         break;
14606       NumConsecutiveStores = i + 1;
14607     }
14608
14609     if (NumConsecutiveStores < 2) {
14610       StoreNodes.erase(StoreNodes.begin(),
14611                        StoreNodes.begin() + NumConsecutiveStores);
14612       continue;
14613     }
14614
14615     // The node with the lowest store address.
14616     LLVMContext &Context = *DAG.getContext();
14617     const DataLayout &DL = DAG.getDataLayout();
14618
14619     // Store the constants into memory as one consecutive store.
14620     if (IsConstantSrc) {
14621       while (NumConsecutiveStores >= 2) {
14622         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14623         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14624         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14625         unsigned LastLegalType = 1;
14626         unsigned LastLegalVectorType = 1;
14627         bool LastIntegerTrunc = false;
14628         bool NonZero = false;
14629         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14630         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14631           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14632           SDValue StoredVal = ST->getValue();
14633           bool IsElementZero = false;
14634           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14635             IsElementZero = C->isNullValue();
14636           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14637             IsElementZero = C->getConstantFPValue()->isNullValue();
14638           if (IsElementZero) {
14639             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14640               FirstZeroAfterNonZero = i;
14641           }
14642           NonZero |= !IsElementZero;
14643
14644           // Find a legal type for the constant store.
14645           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14646           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14647           bool IsFast = false;
14648
14649           // Break early when size is too large to be legal.
14650           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14651             break;
14652
14653           if (TLI.isTypeLegal(StoreTy) &&
14654               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14655               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14656                                      FirstStoreAlign, &IsFast) &&
14657               IsFast) {
14658             LastIntegerTrunc = false;
14659             LastLegalType = i + 1;
14660             // Or check whether a truncstore is legal.
14661           } else if (TLI.getTypeAction(Context, StoreTy) ==
14662                      TargetLowering::TypePromoteInteger) {
14663             EVT LegalizedStoredValTy =
14664                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14665             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14666                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14667                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14668                                        FirstStoreAlign, &IsFast) &&
14669                 IsFast) {
14670               LastIntegerTrunc = true;
14671               LastLegalType = i + 1;
14672             }
14673           }
14674
14675           // We only use vectors if the constant is known to be zero or the
14676           // target allows it and the function is not marked with the
14677           // noimplicitfloat attribute.
14678           if ((!NonZero ||
14679                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14680               !NoVectors) {
14681             // Find a legal type for the vector store.
14682             unsigned Elts = (i + 1) * NumMemElts;
14683             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14684             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14685                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14686                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14687                                        FirstStoreAlign, &IsFast) &&
14688                 IsFast)
14689               LastLegalVectorType = i + 1;
14690           }
14691         }
14692
14693         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14694         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14695
14696         // Check if we found a legal integer type that creates a meaningful
14697         // merge.
14698         if (NumElem < 2) {
14699           // We know that candidate stores are in order and of correct
14700           // shape. While there is no mergeable sequence from the
14701           // beginning one may start later in the sequence. The only
14702           // reason a merge of size N could have failed where another of
14703           // the same size would not have, is if the alignment has
14704           // improved or we've dropped a non-zero value. Drop as many
14705           // candidates as we can here.
14706           unsigned NumSkip = 1;
14707           while (
14708               (NumSkip < NumConsecutiveStores) &&
14709               (NumSkip < FirstZeroAfterNonZero) &&
14710               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14711             NumSkip++;
14712
14713           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14714           NumConsecutiveStores -= NumSkip;
14715           continue;
14716         }
14717
14718         // Check that we can merge these candidates without causing a cycle.
14719         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14720                                                       RootNode)) {
14721           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14722           NumConsecutiveStores -= NumElem;
14723           continue;
14724         }
14725
14726         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14727                                               UseVector, LastIntegerTrunc);
14728
14729         // Remove merged stores for next iteration.
14730         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14731         NumConsecutiveStores -= NumElem;
14732       }
14733       continue;
14734     }
14735
14736     // When extracting multiple vector elements, try to store them
14737     // in one vector store rather than a sequence of scalar stores.
14738     if (IsExtractVecSrc) {
14739       // Loop on Consecutive Stores on success.
14740       while (NumConsecutiveStores >= 2) {
14741         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14742         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14743         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14744         unsigned NumStoresToMerge = 1;
14745         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14746           // Find a legal type for the vector store.
14747           unsigned Elts = (i + 1) * NumMemElts;
14748           EVT Ty =
14749               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14750           bool IsFast;
14751
14752           // Break early when size is too large to be legal.
14753           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14754             break;
14755
14756           if (TLI.isTypeLegal(Ty) &&
14757               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14758               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14759                                      FirstStoreAlign, &IsFast) &&
14760               IsFast)
14761             NumStoresToMerge = i + 1;
14762         }
14763
14764         // Check if we found a legal integer type creating a meaningful
14765         // merge.
14766         if (NumStoresToMerge < 2) {
14767           // We know that candidate stores are in order and of correct
14768           // shape. While there is no mergeable sequence from the
14769           // beginning one may start later in the sequence. The only
14770           // reason a merge of size N could have failed where another of
14771           // the same size would not have, is if the alignment has
14772           // improved. Drop as many candidates as we can here.
14773           unsigned NumSkip = 1;
14774           while (
14775               (NumSkip < NumConsecutiveStores) &&
14776               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14777             NumSkip++;
14778
14779           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14780           NumConsecutiveStores -= NumSkip;
14781           continue;
14782         }
14783
14784         // Check that we can merge these candidates without causing a cycle.
14785         if (!checkMergeStoreCandidatesForDependencies(
14786                 StoreNodes, NumStoresToMerge, RootNode)) {
14787           StoreNodes.erase(StoreNodes.begin(),
14788                            StoreNodes.begin() + NumStoresToMerge);
14789           NumConsecutiveStores -= NumStoresToMerge;
14790           continue;
14791         }
14792
14793         RV |= MergeStoresOfConstantsOrVecElts(
14794             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14795
14796         StoreNodes.erase(StoreNodes.begin(),
14797                          StoreNodes.begin() + NumStoresToMerge);
14798         NumConsecutiveStores -= NumStoresToMerge;
14799       }
14800       continue;
14801     }
14802
14803     // Below we handle the case of multiple consecutive stores that
14804     // come from multiple consecutive loads. We merge them into a single
14805     // wide load and a single wide store.
14806
14807     // Look for load nodes which are used by the stored values.
14808     SmallVector<MemOpLink, 8> LoadNodes;
14809
14810     // Find acceptable loads. Loads need to have the same chain (token factor),
14811     // must not be zext, volatile, indexed, and they must be consecutive.
14812     BaseIndexOffset LdBasePtr;
14813
14814     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14815       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14816       SDValue Val = peekThroughBitcasts(St->getValue());
14817       LoadSDNode *Ld = cast<LoadSDNode>(Val);
14818
14819       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14820       // If this is not the first ptr that we check.
14821       int64_t LdOffset = 0;
14822       if (LdBasePtr.getBase().getNode()) {
14823         // The base ptr must be the same.
14824         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14825           break;
14826       } else {
14827         // Check that all other base pointers are the same as this one.
14828         LdBasePtr = LdPtr;
14829       }
14830
14831       // We found a potential memory operand to merge.
14832       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14833     }
14834
14835     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14836       // If we have load/store pair instructions and we only have two values,
14837       // don't bother merging.
14838       unsigned RequiredAlignment;
14839       if (LoadNodes.size() == 2 &&
14840           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14841           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14842         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14843         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14844         break;
14845       }
14846       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14847       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14848       unsigned FirstStoreAlign = FirstInChain->getAlignment();
14849       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14850       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14851       unsigned FirstLoadAlign = FirstLoad->getAlignment();
14852
14853       // Scan the memory operations on the chain and find the first
14854       // non-consecutive load memory address. These variables hold the index in
14855       // the store node array.
14856
14857       unsigned LastConsecutiveLoad = 1;
14858
14859       // This variable refers to the size and not index in the array.
14860       unsigned LastLegalVectorType = 1;
14861       unsigned LastLegalIntegerType = 1;
14862       bool isDereferenceable = true;
14863       bool DoIntegerTruncate = false;
14864       StartAddress = LoadNodes[0].OffsetFromBase;
14865       SDValue FirstChain = FirstLoad->getChain();
14866       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14867         // All loads must share the same chain.
14868         if (LoadNodes[i].MemNode->getChain() != FirstChain)
14869           break;
14870
14871         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14872         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14873           break;
14874         LastConsecutiveLoad = i;
14875
14876         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14877           isDereferenceable = false;
14878
14879         // Find a legal type for the vector store.
14880         unsigned Elts = (i + 1) * NumMemElts;
14881         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14882
14883         // Break early when size is too large to be legal.
14884         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14885           break;
14886
14887         bool IsFastSt, IsFastLd;
14888         if (TLI.isTypeLegal(StoreTy) &&
14889             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14890             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14891                                    FirstStoreAlign, &IsFastSt) &&
14892             IsFastSt &&
14893             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14894                                    FirstLoadAlign, &IsFastLd) &&
14895             IsFastLd) {
14896           LastLegalVectorType = i + 1;
14897         }
14898
14899         // Find a legal type for the integer store.
14900         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14901         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14902         if (TLI.isTypeLegal(StoreTy) &&
14903             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14904             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14905                                    FirstStoreAlign, &IsFastSt) &&
14906             IsFastSt &&
14907             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14908                                    FirstLoadAlign, &IsFastLd) &&
14909             IsFastLd) {
14910           LastLegalIntegerType = i + 1;
14911           DoIntegerTruncate = false;
14912           // Or check whether a truncstore and extload is legal.
14913         } else if (TLI.getTypeAction(Context, StoreTy) ==
14914                    TargetLowering::TypePromoteInteger) {
14915           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14916           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14917               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14918               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14919                                  StoreTy) &&
14920               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14921                                  StoreTy) &&
14922               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14923               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14924                                      FirstStoreAlign, &IsFastSt) &&
14925               IsFastSt &&
14926               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14927                                      FirstLoadAlign, &IsFastLd) &&
14928               IsFastLd) {
14929             LastLegalIntegerType = i + 1;
14930             DoIntegerTruncate = true;
14931           }
14932         }
14933       }
14934
14935       // Only use vector types if the vector type is larger than the integer
14936       // type. If they are the same, use integers.
14937       bool UseVectorTy =
14938           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14939       unsigned LastLegalType =
14940           std::max(LastLegalVectorType, LastLegalIntegerType);
14941
14942       // We add +1 here because the LastXXX variables refer to location while
14943       // the NumElem refers to array/index size.
14944       unsigned NumElem =
14945           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14946       NumElem = std::min(LastLegalType, NumElem);
14947
14948       if (NumElem < 2) {
14949         // We know that candidate stores are in order and of correct
14950         // shape. While there is no mergeable sequence from the
14951         // beginning one may start later in the sequence. The only
14952         // reason a merge of size N could have failed where another of
14953         // the same size would not have is if the alignment or either
14954         // the load or store has improved. Drop as many candidates as we
14955         // can here.
14956         unsigned NumSkip = 1;
14957         while ((NumSkip < LoadNodes.size()) &&
14958                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14959                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14960           NumSkip++;
14961         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14962         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14963         NumConsecutiveStores -= NumSkip;
14964         continue;
14965       }
14966
14967       // Check that we can merge these candidates without causing a cycle.
14968       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14969                                                     RootNode)) {
14970         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14971         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14972         NumConsecutiveStores -= NumElem;
14973         continue;
14974       }
14975
14976       // Find if it is better to use vectors or integers to load and store
14977       // to memory.
14978       EVT JointMemOpVT;
14979       if (UseVectorTy) {
14980         // Find a legal type for the vector store.
14981         unsigned Elts = NumElem * NumMemElts;
14982         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14983       } else {
14984         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14985         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14986       }
14987
14988       SDLoc LoadDL(LoadNodes[0].MemNode);
14989       SDLoc StoreDL(StoreNodes[0].MemNode);
14990
14991       // The merged loads are required to have the same incoming chain, so
14992       // using the first's chain is acceptable.
14993
14994       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14995       AddToWorklist(NewStoreChain.getNode());
14996
14997       MachineMemOperand::Flags MMOFlags =
14998           isDereferenceable ? MachineMemOperand::MODereferenceable
14999                             : MachineMemOperand::MONone;
15000
15001       SDValue NewLoad, NewStore;
15002       if (UseVectorTy || !DoIntegerTruncate) {
15003         NewLoad =
15004             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15005                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15006                         FirstLoadAlign, MMOFlags);
15007         NewStore = DAG.getStore(
15008             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15009             FirstInChain->getPointerInfo(), FirstStoreAlign);
15010       } else { // This must be the truncstore/extload case
15011         EVT ExtendedTy =
15012             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15013         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15014                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15015                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15016                                  FirstLoadAlign, MMOFlags);
15017         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15018                                      FirstInChain->getBasePtr(),
15019                                      FirstInChain->getPointerInfo(),
15020                                      JointMemOpVT, FirstInChain->getAlignment(),
15021                                      FirstInChain->getMemOperand()->getFlags());
15022       }
15023
15024       // Transfer chain users from old loads to the new load.
15025       for (unsigned i = 0; i < NumElem; ++i) {
15026         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15027         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15028                                       SDValue(NewLoad.getNode(), 1));
15029       }
15030
15031       // Replace the all stores with the new store. Recursively remove
15032       // corresponding value if its no longer used.
15033       for (unsigned i = 0; i < NumElem; ++i) {
15034         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15035         CombineTo(StoreNodes[i].MemNode, NewStore);
15036         if (Val.getNode()->use_empty())
15037           recursivelyDeleteUnusedNodes(Val.getNode());
15038       }
15039
15040       RV = true;
15041       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15042       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15043       NumConsecutiveStores -= NumElem;
15044     }
15045   }
15046   return RV;
15047 }
15048
15049 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15050   SDLoc SL(ST);
15051   SDValue ReplStore;
15052
15053   // Replace the chain to avoid dependency.
15054   if (ST->isTruncatingStore()) {
15055     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15056                                   ST->getBasePtr(), ST->getMemoryVT(),
15057                                   ST->getMemOperand());
15058   } else {
15059     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15060                              ST->getMemOperand());
15061   }
15062
15063   // Create token to keep both nodes around.
15064   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15065                               MVT::Other, ST->getChain(), ReplStore);
15066
15067   // Make sure the new and old chains are cleaned up.
15068   AddToWorklist(Token.getNode());
15069
15070   // Don't add users to work list.
15071   return CombineTo(ST, Token, false);
15072 }
15073
15074 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15075   SDValue Value = ST->getValue();
15076   if (Value.getOpcode() == ISD::TargetConstantFP)
15077     return SDValue();
15078
15079   SDLoc DL(ST);
15080
15081   SDValue Chain = ST->getChain();
15082   SDValue Ptr = ST->getBasePtr();
15083
15084   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15085
15086   // NOTE: If the original store is volatile, this transform must not increase
15087   // the number of stores.  For example, on x86-32 an f64 can be stored in one
15088   // processor operation but an i64 (which is not legal) requires two.  So the
15089   // transform should not be done in this case.
15090
15091   SDValue Tmp;
15092   switch (CFP->getSimpleValueType(0).SimpleTy) {
15093   default:
15094     llvm_unreachable("Unknown FP type");
15095   case MVT::f16:    // We don't do this for these yet.
15096   case MVT::f80:
15097   case MVT::f128:
15098   case MVT::ppcf128:
15099     return SDValue();
15100   case MVT::f32:
15101     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15102         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15103       ;
15104       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15105                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15106                             MVT::i32);
15107       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15108     }
15109
15110     return SDValue();
15111   case MVT::f64:
15112     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15113          !ST->isVolatile()) ||
15114         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15115       ;
15116       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15117                             getZExtValue(), SDLoc(CFP), MVT::i64);
15118       return DAG.getStore(Chain, DL, Tmp,
15119                           Ptr, ST->getMemOperand());
15120     }
15121
15122     if (!ST->isVolatile() &&
15123         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15124       // Many FP stores are not made apparent until after legalize, e.g. for
15125       // argument passing.  Since this is so common, custom legalize the
15126       // 64-bit integer store into two 32-bit stores.
15127       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15128       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15129       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15130       if (DAG.getDataLayout().isBigEndian())
15131         std::swap(Lo, Hi);
15132
15133       unsigned Alignment = ST->getAlignment();
15134       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15135       AAMDNodes AAInfo = ST->getAAInfo();
15136
15137       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15138                                  ST->getAlignment(), MMOFlags, AAInfo);
15139       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15140                         DAG.getConstant(4, DL, Ptr.getValueType()));
15141       Alignment = MinAlign(Alignment, 4U);
15142       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15143                                  ST->getPointerInfo().getWithOffset(4),
15144                                  Alignment, MMOFlags, AAInfo);
15145       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15146                          St0, St1);
15147     }
15148
15149     return SDValue();
15150   }
15151 }
15152
15153 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15154   StoreSDNode *ST  = cast<StoreSDNode>(N);
15155   SDValue Chain = ST->getChain();
15156   SDValue Value = ST->getValue();
15157   SDValue Ptr   = ST->getBasePtr();
15158
15159   // If this is a store of a bit convert, store the input value if the
15160   // resultant store does not need a higher alignment than the original.
15161   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15162       ST->isUnindexed()) {
15163     EVT SVT = Value.getOperand(0).getValueType();
15164     // If the store is volatile, we only want to change the store type if the
15165     // resulting store is legal. Otherwise we might increase the number of
15166     // memory accesses. We don't care if the original type was legal or not
15167     // as we assume software couldn't rely on the number of accesses of an
15168     // illegal type.
15169     if (((!LegalOperations && !ST->isVolatile()) ||
15170          TLI.isOperationLegal(ISD::STORE, SVT)) &&
15171         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15172       unsigned OrigAlign = ST->getAlignment();
15173       bool Fast = false;
15174       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15175                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
15176           Fast) {
15177         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15178                             ST->getPointerInfo(), OrigAlign,
15179                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
15180       }
15181     }
15182   }
15183
15184   // Turn 'store undef, Ptr' -> nothing.
15185   if (Value.isUndef() && ST->isUnindexed())
15186     return Chain;
15187
15188   // Try to infer better alignment information than the store already has.
15189   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15190     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15191       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15192         SDValue NewStore =
15193             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15194                               ST->getMemoryVT(), Align,
15195                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
15196         // NewStore will always be N as we are only refining the alignment
15197         assert(NewStore.getNode() == N);
15198         (void)NewStore;
15199       }
15200     }
15201   }
15202
15203   // Try transforming a pair floating point load / store ops to integer
15204   // load / store ops.
15205   if (SDValue NewST = TransformFPLoadStorePair(N))
15206     return NewST;
15207
15208   if (ST->isUnindexed()) {
15209     // Walk up chain skipping non-aliasing memory nodes, on this store and any
15210     // adjacent stores.
15211     if (findBetterNeighborChains(ST)) {
15212       // replaceStoreChain uses CombineTo, which handled all of the worklist
15213       // manipulation. Return the original node to not do anything else.
15214       return SDValue(ST, 0);
15215     }
15216     Chain = ST->getChain();
15217   }
15218
15219   // FIXME: is there such a thing as a truncating indexed store?
15220   if (ST->isTruncatingStore() && ST->isUnindexed() &&
15221       Value.getValueType().isInteger() &&
15222       (!isa<ConstantSDNode>(Value) ||
15223        !cast<ConstantSDNode>(Value)->isOpaque())) {
15224     // See if we can simplify the input to this truncstore with knowledge that
15225     // only the low bits are being used.  For example:
15226     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
15227     SDValue Shorter = DAG.GetDemandedBits(
15228         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15229                                     ST->getMemoryVT().getScalarSizeInBits()));
15230     AddToWorklist(Value.getNode());
15231     if (Shorter.getNode())
15232       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
15233                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
15234
15235     // Otherwise, see if we can simplify the operation with
15236     // SimplifyDemandedBits, which only works if the value has a single use.
15237     if (SimplifyDemandedBits(
15238             Value,
15239             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15240                                  ST->getMemoryVT().getScalarSizeInBits()))) {
15241       // Re-visit the store if anything changed and the store hasn't been merged
15242       // with another node (N is deleted) SimplifyDemandedBits will add Value's
15243       // node back to the worklist if necessary, but we also need to re-visit
15244       // the Store node itself.
15245       if (N->getOpcode() != ISD::DELETED_NODE)
15246         AddToWorklist(N);
15247       return SDValue(N, 0);
15248     }
15249   }
15250
15251   // If this is a load followed by a store to the same location, then the store
15252   // is dead/noop.
15253   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15254     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15255         ST->isUnindexed() && !ST->isVolatile() &&
15256         // There can't be any side effects between the load and store, such as
15257         // a call or store.
15258         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15259       // The store is dead, remove it.
15260       return Chain;
15261     }
15262   }
15263
15264   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15265     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15266         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
15267         ST->getMemoryVT() == ST1->getMemoryVT()) {
15268       // If this is a store followed by a store with the same value to the same
15269       // location, then the store is dead/noop.
15270       if (ST1->getValue() == Value) {
15271         // The store is dead, remove it.
15272         return Chain;
15273       }
15274
15275       // If this is a store who's preceeding store to the same location
15276       // and no one other node is chained to that store we can effectively
15277       // drop the store. Do not remove stores to undef as they may be used as
15278       // data sinks.
15279       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15280           !ST1->getBasePtr().isUndef()) {
15281         // ST1 is fully overwritten and can be elided. Combine with it's chain
15282         // value.
15283         CombineTo(ST1, ST1->getChain());
15284         return SDValue();
15285       }
15286     }
15287   }
15288
15289   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15290   // truncating store.  We can do this even if this is already a truncstore.
15291   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15292       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15293       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15294                             ST->getMemoryVT())) {
15295     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15296                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
15297   }
15298
15299   // Always perform this optimization before types are legal. If the target
15300   // prefers, also try this after legalization to catch stores that were created
15301   // by intrinsics or other nodes.
15302   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15303     while (true) {
15304       // There can be multiple store sequences on the same chain.
15305       // Keep trying to merge store sequences until we are unable to do so
15306       // or until we merge the last store on the chain.
15307       bool Changed = MergeConsecutiveStores(ST);
15308       if (!Changed) break;
15309       // Return N as merge only uses CombineTo and no worklist clean
15310       // up is necessary.
15311       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15312         return SDValue(N, 0);
15313     }
15314   }
15315
15316   // Try transforming N to an indexed store.
15317   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15318     return SDValue(N, 0);
15319
15320   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15321   //
15322   // Make sure to do this only after attempting to merge stores in order to
15323   //  avoid changing the types of some subset of stores due to visit order,
15324   //  preventing their merging.
15325   if (isa<ConstantFPSDNode>(ST->getValue())) {
15326     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15327       return NewSt;
15328   }
15329
15330   if (SDValue NewSt = splitMergedValStore(ST))
15331     return NewSt;
15332
15333   return ReduceLoadOpStoreWidth(N);
15334 }
15335
15336 /// For the instruction sequence of store below, F and I values
15337 /// are bundled together as an i64 value before being stored into memory.
15338 /// Sometimes it is more efficent to generate separate stores for F and I,
15339 /// which can remove the bitwise instructions or sink them to colder places.
15340 ///
15341 ///   (store (or (zext (bitcast F to i32) to i64),
15342 ///              (shl (zext I to i64), 32)), addr)  -->
15343 ///   (store F, addr) and (store I, addr+4)
15344 ///
15345 /// Similarly, splitting for other merged store can also be beneficial, like:
15346 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15347 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15348 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15349 /// For pair of {i16, i8},  i32 store --> two i16 stores.
15350 /// For pair of {i8, i8},   i16 store --> two i8 stores.
15351 ///
15352 /// We allow each target to determine specifically which kind of splitting is
15353 /// supported.
15354 ///
15355 /// The store patterns are commonly seen from the simple code snippet below
15356 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15357 ///   void goo(const std::pair<int, float> &);
15358 ///   hoo() {
15359 ///     ...
15360 ///     goo(std::make_pair(tmp, ftmp));
15361 ///     ...
15362 ///   }
15363 ///
15364 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15365   if (OptLevel == CodeGenOpt::None)
15366     return SDValue();
15367
15368   SDValue Val = ST->getValue();
15369   SDLoc DL(ST);
15370
15371   // Match OR operand.
15372   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15373     return SDValue();
15374
15375   // Match SHL operand and get Lower and Higher parts of Val.
15376   SDValue Op1 = Val.getOperand(0);
15377   SDValue Op2 = Val.getOperand(1);
15378   SDValue Lo, Hi;
15379   if (Op1.getOpcode() != ISD::SHL) {
15380     std::swap(Op1, Op2);
15381     if (Op1.getOpcode() != ISD::SHL)
15382       return SDValue();
15383   }
15384   Lo = Op2;
15385   Hi = Op1.getOperand(0);
15386   if (!Op1.hasOneUse())
15387     return SDValue();
15388
15389   // Match shift amount to HalfValBitSize.
15390   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15391   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15392   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15393     return SDValue();
15394
15395   // Lo and Hi are zero-extended from int with size less equal than 32
15396   // to i64.
15397   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15398       !Lo.getOperand(0).getValueType().isScalarInteger() ||
15399       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15400       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15401       !Hi.getOperand(0).getValueType().isScalarInteger() ||
15402       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15403     return SDValue();
15404
15405   // Use the EVT of low and high parts before bitcast as the input
15406   // of target query.
15407   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15408                   ? Lo.getOperand(0).getValueType()
15409                   : Lo.getValueType();
15410   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15411                    ? Hi.getOperand(0).getValueType()
15412                    : Hi.getValueType();
15413   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15414     return SDValue();
15415
15416   // Start to split store.
15417   unsigned Alignment = ST->getAlignment();
15418   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15419   AAMDNodes AAInfo = ST->getAAInfo();
15420
15421   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15422   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15423   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15424   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15425
15426   SDValue Chain = ST->getChain();
15427   SDValue Ptr = ST->getBasePtr();
15428   // Lower value store.
15429   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15430                              ST->getAlignment(), MMOFlags, AAInfo);
15431   Ptr =
15432       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15433                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15434   // Higher value store.
15435   SDValue St1 =
15436       DAG.getStore(St0, DL, Hi, Ptr,
15437                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15438                    Alignment / 2, MMOFlags, AAInfo);
15439   return St1;
15440 }
15441
15442 /// Convert a disguised subvector insertion into a shuffle:
15443 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15444 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15445 /// Note: We do not use an insert_subvector node because that requires a legal
15446 /// subvector type.
15447 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15448   SDValue InsertVal = N->getOperand(1);
15449   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15450       !InsertVal.getOperand(0).getValueType().isVector())
15451     return SDValue();
15452
15453   SDValue SubVec = InsertVal.getOperand(0);
15454   SDValue DestVec = N->getOperand(0);
15455   EVT SubVecVT = SubVec.getValueType();
15456   EVT VT = DestVec.getValueType();
15457   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15458   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15459   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15460
15461   // Step 1: Create a shuffle mask that implements this insert operation. The
15462   // vector that we are inserting into will be operand 0 of the shuffle, so
15463   // those elements are just 'i'. The inserted subvector is in the first
15464   // positions of operand 1 of the shuffle. Example:
15465   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15466   SmallVector<int, 16> Mask(NumMaskVals);
15467   for (unsigned i = 0; i != NumMaskVals; ++i) {
15468     if (i / NumSrcElts == InsIndex)
15469       Mask[i] = (i % NumSrcElts) + NumMaskVals;
15470     else
15471       Mask[i] = i;
15472   }
15473
15474   // Bail out if the target can not handle the shuffle we want to create.
15475   EVT SubVecEltVT = SubVecVT.getVectorElementType();
15476   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15477   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15478     return SDValue();
15479
15480   // Step 2: Create a wide vector from the inserted source vector by appending
15481   // undefined elements. This is the same size as our destination vector.
15482   SDLoc DL(N);
15483   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15484   ConcatOps[0] = SubVec;
15485   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15486
15487   // Step 3: Shuffle in the padded subvector.
15488   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15489   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15490   AddToWorklist(PaddedSubV.getNode());
15491   AddToWorklist(DestVecBC.getNode());
15492   AddToWorklist(Shuf.getNode());
15493   return DAG.getBitcast(VT, Shuf);
15494 }
15495
15496 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15497   SDValue InVec = N->getOperand(0);
15498   SDValue InVal = N->getOperand(1);
15499   SDValue EltNo = N->getOperand(2);
15500   SDLoc DL(N);
15501
15502   // If the inserted element is an UNDEF, just use the input vector.
15503   if (InVal.isUndef())
15504     return InVec;
15505
15506   EVT VT = InVec.getValueType();
15507   unsigned NumElts = VT.getVectorNumElements();
15508
15509   // Remove redundant insertions:
15510   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15511   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15512       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15513     return InVec;
15514
15515   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15516   if (!IndexC) {
15517     // If this is variable insert to undef vector, it might be better to splat:
15518     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
15519     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
15520       SmallVector<SDValue, 8> Ops(NumElts, InVal);
15521       return DAG.getBuildVector(VT, DL, Ops);
15522     }
15523     return SDValue();
15524   }
15525
15526   // We must know which element is being inserted for folds below here.
15527   unsigned Elt = IndexC->getZExtValue();
15528   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15529     return Shuf;
15530
15531   // Canonicalize insert_vector_elt dag nodes.
15532   // Example:
15533   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15534   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15535   //
15536   // Do this only if the child insert_vector node has one use; also
15537   // do this only if indices are both constants and Idx1 < Idx0.
15538   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15539       && isa<ConstantSDNode>(InVec.getOperand(2))) {
15540     unsigned OtherElt = InVec.getConstantOperandVal(2);
15541     if (Elt < OtherElt) {
15542       // Swap nodes.
15543       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15544                                   InVec.getOperand(0), InVal, EltNo);
15545       AddToWorklist(NewOp.getNode());
15546       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15547                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15548     }
15549   }
15550
15551   // If we can't generate a legal BUILD_VECTOR, exit
15552   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15553     return SDValue();
15554
15555   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15556   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
15557   // vector elements.
15558   SmallVector<SDValue, 8> Ops;
15559   // Do not combine these two vectors if the output vector will not replace
15560   // the input vector.
15561   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15562     Ops.append(InVec.getNode()->op_begin(),
15563                InVec.getNode()->op_end());
15564   } else if (InVec.isUndef()) {
15565     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
15566   } else {
15567     return SDValue();
15568   }
15569   assert(Ops.size() == NumElts && "Unexpected vector size");
15570
15571   // Insert the element
15572   if (Elt < Ops.size()) {
15573     // All the operands of BUILD_VECTOR must have the same type;
15574     // we enforce that here.
15575     EVT OpVT = Ops[0].getValueType();
15576     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15577   }
15578
15579   // Return the new vector
15580   return DAG.getBuildVector(VT, DL, Ops);
15581 }
15582
15583 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
15584                                                   SDValue EltNo,
15585                                                   LoadSDNode *OriginalLoad) {
15586   assert(!OriginalLoad->isVolatile());
15587
15588   EVT ResultVT = EVE->getValueType(0);
15589   EVT VecEltVT = InVecVT.getVectorElementType();
15590   unsigned Align = OriginalLoad->getAlignment();
15591   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15592       VecEltVT.getTypeForEVT(*DAG.getContext()));
15593
15594   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15595     return SDValue();
15596
15597   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15598     ISD::NON_EXTLOAD : ISD::EXTLOAD;
15599   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15600     return SDValue();
15601
15602   Align = NewAlign;
15603
15604   SDValue NewPtr = OriginalLoad->getBasePtr();
15605   SDValue Offset;
15606   EVT PtrType = NewPtr.getValueType();
15607   MachinePointerInfo MPI;
15608   SDLoc DL(EVE);
15609   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15610     int Elt = ConstEltNo->getZExtValue();
15611     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15612     Offset = DAG.getConstant(PtrOff, DL, PtrType);
15613     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15614   } else {
15615     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15616     Offset = DAG.getNode(
15617         ISD::MUL, DL, PtrType, Offset,
15618         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15619     MPI = OriginalLoad->getPointerInfo();
15620   }
15621   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15622
15623   // The replacement we need to do here is a little tricky: we need to
15624   // replace an extractelement of a load with a load.
15625   // Use ReplaceAllUsesOfValuesWith to do the replacement.
15626   // Note that this replacement assumes that the extractvalue is the only
15627   // use of the load; that's okay because we don't want to perform this
15628   // transformation in other cases anyway.
15629   SDValue Load;
15630   SDValue Chain;
15631   if (ResultVT.bitsGT(VecEltVT)) {
15632     // If the result type of vextract is wider than the load, then issue an
15633     // extending load instead.
15634     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15635                                                   VecEltVT)
15636                                    ? ISD::ZEXTLOAD
15637                                    : ISD::EXTLOAD;
15638     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15639                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15640                           Align, OriginalLoad->getMemOperand()->getFlags(),
15641                           OriginalLoad->getAAInfo());
15642     Chain = Load.getValue(1);
15643   } else {
15644     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15645                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15646                        OriginalLoad->getAAInfo());
15647     Chain = Load.getValue(1);
15648     if (ResultVT.bitsLT(VecEltVT))
15649       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15650     else
15651       Load = DAG.getBitcast(ResultVT, Load);
15652   }
15653   WorklistRemover DeadNodes(*this);
15654   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15655   SDValue To[] = { Load, Chain };
15656   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15657   // Since we're explicitly calling ReplaceAllUses, add the new node to the
15658   // worklist explicitly as well.
15659   AddToWorklist(Load.getNode());
15660   AddUsersToWorklist(Load.getNode()); // Add users too
15661   // Make sure to revisit this node to clean it up; it will usually be dead.
15662   AddToWorklist(EVE);
15663   ++OpsNarrowed;
15664   return SDValue(EVE, 0);
15665 }
15666
15667 /// Transform a vector binary operation into a scalar binary operation by moving
15668 /// the math/logic after an extract element of a vector.
15669 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
15670                                        bool LegalOperations) {
15671   SDValue Vec = ExtElt->getOperand(0);
15672   SDValue Index = ExtElt->getOperand(1);
15673   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15674   if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
15675     return SDValue();
15676
15677   // Targets may want to avoid this to prevent an expensive register transfer.
15678   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15679   if (!TLI.shouldScalarizeBinop(Vec))
15680     return SDValue();
15681
15682   // Extracting an element of a vector constant is constant-folded, so this
15683   // transform is just replacing a vector op with a scalar op while moving the
15684   // extract.
15685   SDValue Op0 = Vec.getOperand(0);
15686   SDValue Op1 = Vec.getOperand(1);
15687   if (isAnyConstantBuildVector(Op0, true) ||
15688       isAnyConstantBuildVector(Op1, true)) {
15689     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
15690     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
15691     SDLoc DL(ExtElt);
15692     EVT VT = ExtElt->getValueType(0);
15693     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
15694     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
15695     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
15696   }
15697
15698   return SDValue();
15699 }
15700
15701 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15702   SDValue VecOp = N->getOperand(0);
15703   SDValue Index = N->getOperand(1);
15704   EVT ScalarVT = N->getValueType(0);
15705   EVT VecVT = VecOp.getValueType();
15706   if (VecOp.isUndef())
15707     return DAG.getUNDEF(ScalarVT);
15708
15709   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15710   //
15711   // This only really matters if the index is non-constant since other combines
15712   // on the constant elements already work.
15713   SDLoc DL(N);
15714   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15715       Index == VecOp.getOperand(2)) {
15716     SDValue Elt = VecOp.getOperand(1);
15717     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
15718   }
15719
15720   // (vextract (scalar_to_vector val, 0) -> val
15721   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15722     // Check if the result type doesn't match the inserted element type. A
15723     // SCALAR_TO_VECTOR may truncate the inserted element and the
15724     // EXTRACT_VECTOR_ELT may widen the extracted vector.
15725     SDValue InOp = VecOp.getOperand(0);
15726     if (InOp.getValueType() != ScalarVT) {
15727       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
15728       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
15729     }
15730     return InOp;
15731   }
15732
15733   // extract_vector_elt of out-of-bounds element -> UNDEF
15734   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15735   unsigned NumElts = VecVT.getVectorNumElements();
15736   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
15737     return DAG.getUNDEF(ScalarVT);
15738
15739   // extract_vector_elt (build_vector x, y), 1 -> y
15740   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
15741       TLI.isTypeLegal(VecVT) &&
15742       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
15743     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
15744     EVT InEltVT = Elt.getValueType();
15745
15746     // Sometimes build_vector's scalar input types do not match result type.
15747     if (ScalarVT == InEltVT)
15748       return Elt;
15749
15750     // TODO: It may be useful to truncate if free if the build_vector implicitly
15751     // converts.
15752   }
15753
15754   // TODO: These transforms should not require the 'hasOneUse' restriction, but
15755   // there are regressions on multiple targets without it. We can end up with a
15756   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
15757   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
15758       VecOp.hasOneUse()) {
15759     // The vector index of the LSBs of the source depend on the endian-ness.
15760     bool IsLE = DAG.getDataLayout().isLittleEndian();
15761     unsigned ExtractIndex = IndexC->getZExtValue();
15762     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
15763     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
15764     SDValue BCSrc = VecOp.getOperand(0);
15765     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
15766       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
15767
15768     if (LegalTypes && BCSrc.getValueType().isInteger() &&
15769         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15770       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
15771       // trunc i64 X to i32
15772       SDValue X = BCSrc.getOperand(0);
15773       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
15774              "Extract element and scalar to vector can't change element type "
15775              "from FP to integer.");
15776       unsigned XBitWidth = X.getValueSizeInBits();
15777       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
15778       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
15779
15780       // An extract element return value type can be wider than its vector
15781       // operand element type. In that case, the high bits are undefined, so
15782       // it's possible that we may need to extend rather than truncate.
15783       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
15784         assert(XBitWidth % VecEltBitWidth == 0 &&
15785                "Scalar bitwidth must be a multiple of vector element bitwidth");
15786         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
15787       }
15788     }
15789   }
15790
15791   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
15792     return BO;
15793
15794   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15795   // We only perform this optimization before the op legalization phase because
15796   // we may introduce new vector instructions which are not backed by TD
15797   // patterns. For example on AVX, extracting elements from a wide vector
15798   // without using extract_subvector. However, if we can find an underlying
15799   // scalar value, then we can always use that.
15800   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
15801     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
15802     // Find the new index to extract from.
15803     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
15804
15805     // Extracting an undef index is undef.
15806     if (OrigElt == -1)
15807       return DAG.getUNDEF(ScalarVT);
15808
15809     // Select the right vector half to extract from.
15810     SDValue SVInVec;
15811     if (OrigElt < (int)NumElts) {
15812       SVInVec = VecOp.getOperand(0);
15813     } else {
15814       SVInVec = VecOp.getOperand(1);
15815       OrigElt -= NumElts;
15816     }
15817
15818     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15819       SDValue InOp = SVInVec.getOperand(OrigElt);
15820       if (InOp.getValueType() != ScalarVT) {
15821         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
15822         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
15823       }
15824
15825       return InOp;
15826     }
15827
15828     // FIXME: We should handle recursing on other vector shuffles and
15829     // scalar_to_vector here as well.
15830
15831     if (!LegalOperations ||
15832         // FIXME: Should really be just isOperationLegalOrCustom.
15833         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
15834         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
15835       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15836       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
15837                          DAG.getConstant(OrigElt, DL, IndexTy));
15838     }
15839   }
15840
15841   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15842   // simplify it based on the (valid) extraction indices.
15843   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
15844         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15845                Use->getOperand(0) == VecOp &&
15846                isa<ConstantSDNode>(Use->getOperand(1));
15847       })) {
15848     APInt DemandedElts = APInt::getNullValue(NumElts);
15849     for (SDNode *Use : VecOp->uses()) {
15850       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15851       if (CstElt->getAPIntValue().ult(NumElts))
15852         DemandedElts.setBit(CstElt->getZExtValue());
15853     }
15854     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
15855       // We simplified the vector operand of this extract element. If this
15856       // extract is not dead, visit it again so it is folded properly.
15857       if (N->getOpcode() != ISD::DELETED_NODE)
15858         AddToWorklist(N);
15859       return SDValue(N, 0);
15860     }
15861   }
15862
15863   // Everything under here is trying to match an extract of a loaded value.
15864   // If the result of load has to be truncated, then it's not necessarily
15865   // profitable.
15866   bool BCNumEltsChanged = false;
15867   EVT ExtVT = VecVT.getVectorElementType();
15868   EVT LVT = ExtVT;
15869   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
15870     return SDValue();
15871
15872   if (VecOp.getOpcode() == ISD::BITCAST) {
15873     // Don't duplicate a load with other uses.
15874     if (!VecOp.hasOneUse())
15875       return SDValue();
15876
15877     EVT BCVT = VecOp.getOperand(0).getValueType();
15878     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15879       return SDValue();
15880     if (NumElts != BCVT.getVectorNumElements())
15881       BCNumEltsChanged = true;
15882     VecOp = VecOp.getOperand(0);
15883     ExtVT = BCVT.getVectorElementType();
15884   }
15885
15886   // extract (vector load $addr), i --> load $addr + i * size
15887   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
15888       ISD::isNormalLoad(VecOp.getNode()) &&
15889       !Index->hasPredecessor(VecOp.getNode())) {
15890     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
15891     if (VecLoad && !VecLoad->isVolatile())
15892       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
15893   }
15894
15895   // Perform only after legalization to ensure build_vector / vector_shuffle
15896   // optimizations have already been done.
15897   if (!LegalOperations || !IndexC)
15898     return SDValue();
15899
15900   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15901   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15902   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15903   int Elt = IndexC->getZExtValue();
15904   LoadSDNode *LN0 = nullptr;
15905   if (ISD::isNormalLoad(VecOp.getNode())) {
15906     LN0 = cast<LoadSDNode>(VecOp);
15907   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15908              VecOp.getOperand(0).getValueType() == ExtVT &&
15909              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
15910     // Don't duplicate a load with other uses.
15911     if (!VecOp.hasOneUse())
15912       return SDValue();
15913
15914     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
15915   }
15916   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
15917     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15918     // =>
15919     // (load $addr+1*size)
15920
15921     // Don't duplicate a load with other uses.
15922     if (!VecOp.hasOneUse())
15923       return SDValue();
15924
15925     // If the bit convert changed the number of elements, it is unsafe
15926     // to examine the mask.
15927     if (BCNumEltsChanged)
15928       return SDValue();
15929
15930     // Select the input vector, guarding against out of range extract vector.
15931     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
15932     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
15933
15934     if (VecOp.getOpcode() == ISD::BITCAST) {
15935       // Don't duplicate a load with other uses.
15936       if (!VecOp.hasOneUse())
15937         return SDValue();
15938
15939       VecOp = VecOp.getOperand(0);
15940     }
15941     if (ISD::isNormalLoad(VecOp.getNode())) {
15942       LN0 = cast<LoadSDNode>(VecOp);
15943       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
15944       Index = DAG.getConstant(Elt, DL, Index.getValueType());
15945     }
15946   }
15947
15948   // Make sure we found a non-volatile load and the extractelement is
15949   // the only use.
15950   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15951     return SDValue();
15952
15953   // If Idx was -1 above, Elt is going to be -1, so just return undef.
15954   if (Elt == -1)
15955     return DAG.getUNDEF(LVT);
15956
15957   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
15958 }
15959
15960 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15961 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15962   // We perform this optimization post type-legalization because
15963   // the type-legalizer often scalarizes integer-promoted vectors.
15964   // Performing this optimization before may create bit-casts which
15965   // will be type-legalized to complex code sequences.
15966   // We perform this optimization only before the operation legalizer because we
15967   // may introduce illegal operations.
15968   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15969     return SDValue();
15970
15971   unsigned NumInScalars = N->getNumOperands();
15972   SDLoc DL(N);
15973   EVT VT = N->getValueType(0);
15974
15975   // Check to see if this is a BUILD_VECTOR of a bunch of values
15976   // which come from any_extend or zero_extend nodes. If so, we can create
15977   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15978   // optimizations. We do not handle sign-extend because we can't fill the sign
15979   // using shuffles.
15980   EVT SourceType = MVT::Other;
15981   bool AllAnyExt = true;
15982
15983   for (unsigned i = 0; i != NumInScalars; ++i) {
15984     SDValue In = N->getOperand(i);
15985     // Ignore undef inputs.
15986     if (In.isUndef()) continue;
15987
15988     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
15989     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15990
15991     // Abort if the element is not an extension.
15992     if (!ZeroExt && !AnyExt) {
15993       SourceType = MVT::Other;
15994       break;
15995     }
15996
15997     // The input is a ZeroExt or AnyExt. Check the original type.
15998     EVT InTy = In.getOperand(0).getValueType();
15999
16000     // Check that all of the widened source types are the same.
16001     if (SourceType == MVT::Other)
16002       // First time.
16003       SourceType = InTy;
16004     else if (InTy != SourceType) {
16005       // Multiple income types. Abort.
16006       SourceType = MVT::Other;
16007       break;
16008     }
16009
16010     // Check if all of the extends are ANY_EXTENDs.
16011     AllAnyExt &= AnyExt;
16012   }
16013
16014   // In order to have valid types, all of the inputs must be extended from the
16015   // same source type and all of the inputs must be any or zero extend.
16016   // Scalar sizes must be a power of two.
16017   EVT OutScalarTy = VT.getScalarType();
16018   bool ValidTypes = SourceType != MVT::Other &&
16019                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16020                  isPowerOf2_32(SourceType.getSizeInBits());
16021
16022   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16023   // turn into a single shuffle instruction.
16024   if (!ValidTypes)
16025     return SDValue();
16026
16027   bool isLE = DAG.getDataLayout().isLittleEndian();
16028   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16029   assert(ElemRatio > 1 && "Invalid element size ratio");
16030   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16031                                DAG.getConstant(0, DL, SourceType);
16032
16033   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16034   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16035
16036   // Populate the new build_vector
16037   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16038     SDValue Cast = N->getOperand(i);
16039     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16040             Cast.getOpcode() == ISD::ZERO_EXTEND ||
16041             Cast.isUndef()) && "Invalid cast opcode");
16042     SDValue In;
16043     if (Cast.isUndef())
16044       In = DAG.getUNDEF(SourceType);
16045     else
16046       In = Cast->getOperand(0);
16047     unsigned Index = isLE ? (i * ElemRatio) :
16048                             (i * ElemRatio + (ElemRatio - 1));
16049
16050     assert(Index < Ops.size() && "Invalid index");
16051     Ops[Index] = In;
16052   }
16053
16054   // The type of the new BUILD_VECTOR node.
16055   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16056   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16057          "Invalid vector size");
16058   // Check if the new vector type is legal.
16059   if (!isTypeLegal(VecVT) ||
16060       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16061        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
16062     return SDValue();
16063
16064   // Make the new BUILD_VECTOR.
16065   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16066
16067   // The new BUILD_VECTOR node has the potential to be further optimized.
16068   AddToWorklist(BV.getNode());
16069   // Bitcast to the desired type.
16070   return DAG.getBitcast(VT, BV);
16071 }
16072
16073 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16074                                            ArrayRef<int> VectorMask,
16075                                            SDValue VecIn1, SDValue VecIn2,
16076                                            unsigned LeftIdx) {
16077   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16078   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16079
16080   EVT VT = N->getValueType(0);
16081   EVT InVT1 = VecIn1.getValueType();
16082   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16083
16084   unsigned Vec2Offset = 0;
16085   unsigned NumElems = VT.getVectorNumElements();
16086   unsigned ShuffleNumElems = NumElems;
16087
16088   // In case both the input vectors are extracted from same base
16089   // vector we do not need extra addend (Vec2Offset) while
16090   // computing shuffle mask.
16091   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16092       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16093       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
16094     Vec2Offset = InVT1.getVectorNumElements();
16095
16096   // We can't generate a shuffle node with mismatched input and output types.
16097   // Try to make the types match the type of the output.
16098   if (InVT1 != VT || InVT2 != VT) {
16099     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16100       // If the output vector length is a multiple of both input lengths,
16101       // we can concatenate them and pad the rest with undefs.
16102       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16103       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16104       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16105       ConcatOps[0] = VecIn1;
16106       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16107       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16108       VecIn2 = SDValue();
16109     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16110       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16111         return SDValue();
16112
16113       if (!VecIn2.getNode()) {
16114         // If we only have one input vector, and it's twice the size of the
16115         // output, split it in two.
16116         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16117                              DAG.getConstant(NumElems, DL, IdxTy));
16118         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16119         // Since we now have shorter input vectors, adjust the offset of the
16120         // second vector's start.
16121         Vec2Offset = NumElems;
16122       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16123         // VecIn1 is wider than the output, and we have another, possibly
16124         // smaller input. Pad the smaller input with undefs, shuffle at the
16125         // input vector width, and extract the output.
16126         // The shuffle type is different than VT, so check legality again.
16127         if (LegalOperations &&
16128             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
16129           return SDValue();
16130
16131         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16132         // lower it back into a BUILD_VECTOR. So if the inserted type is
16133         // illegal, don't even try.
16134         if (InVT1 != InVT2) {
16135           if (!TLI.isTypeLegal(InVT2))
16136             return SDValue();
16137           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16138                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16139         }
16140         ShuffleNumElems = NumElems * 2;
16141       } else {
16142         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16143         // than VecIn1. We can't handle this for now - this case will disappear
16144         // when we start sorting the vectors by type.
16145         return SDValue();
16146       }
16147     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16148                InVT1.getSizeInBits() == VT.getSizeInBits()) {
16149       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16150       ConcatOps[0] = VecIn2;
16151       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16152     } else {
16153       // TODO: Support cases where the length mismatch isn't exactly by a
16154       // factor of 2.
16155       // TODO: Move this check upwards, so that if we have bad type
16156       // mismatches, we don't create any DAG nodes.
16157       return SDValue();
16158     }
16159   }
16160
16161   // Initialize mask to undef.
16162   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16163
16164   // Only need to run up to the number of elements actually used, not the
16165   // total number of elements in the shuffle - if we are shuffling a wider
16166   // vector, the high lanes should be set to undef.
16167   for (unsigned i = 0; i != NumElems; ++i) {
16168     if (VectorMask[i] <= 0)
16169       continue;
16170
16171     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16172     if (VectorMask[i] == (int)LeftIdx) {
16173       Mask[i] = ExtIndex;
16174     } else if (VectorMask[i] == (int)LeftIdx + 1) {
16175       Mask[i] = Vec2Offset + ExtIndex;
16176     }
16177   }
16178
16179   // The type the input vectors may have changed above.
16180   InVT1 = VecIn1.getValueType();
16181
16182   // If we already have a VecIn2, it should have the same type as VecIn1.
16183   // If we don't, get an undef/zero vector of the appropriate type.
16184   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16185   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16186
16187   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16188   if (ShuffleNumElems > NumElems)
16189     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16190
16191   return Shuffle;
16192 }
16193
16194 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
16195   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16196
16197   // First, determine where the build vector is not undef.
16198   // TODO: We could extend this to handle zero elements as well as undefs.
16199   int NumBVOps = BV->getNumOperands();
16200   int ZextElt = -1;
16201   for (int i = 0; i != NumBVOps; ++i) {
16202     SDValue Op = BV->getOperand(i);
16203     if (Op.isUndef())
16204       continue;
16205     if (ZextElt == -1)
16206       ZextElt = i;
16207     else
16208       return SDValue();
16209   }
16210   // Bail out if there's no non-undef element.
16211   if (ZextElt == -1)
16212     return SDValue();
16213
16214   // The build vector contains some number of undef elements and exactly
16215   // one other element. That other element must be a zero-extended scalar
16216   // extracted from a vector at a constant index to turn this into a shuffle.
16217   // Also, require that the build vector does not implicitly truncate/extend
16218   // its elements.
16219   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16220   EVT VT = BV->getValueType(0);
16221   SDValue Zext = BV->getOperand(ZextElt);
16222   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16223       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16224       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16225       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16226     return SDValue();
16227
16228   // The zero-extend must be a multiple of the source size, and we must be
16229   // building a vector of the same size as the source of the extract element.
16230   SDValue Extract = Zext.getOperand(0);
16231   unsigned DestSize = Zext.getValueSizeInBits();
16232   unsigned SrcSize = Extract.getValueSizeInBits();
16233   if (DestSize % SrcSize != 0 ||
16234       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16235     return SDValue();
16236
16237   // Create a shuffle mask that will combine the extracted element with zeros
16238   // and undefs.
16239   int ZextRatio = DestSize / SrcSize;
16240   int NumMaskElts = NumBVOps * ZextRatio;
16241   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16242   for (int i = 0; i != NumMaskElts; ++i) {
16243     if (i / ZextRatio == ZextElt) {
16244       // The low bits of the (potentially translated) extracted element map to
16245       // the source vector. The high bits map to zero. We will use a zero vector
16246       // as the 2nd source operand of the shuffle, so use the 1st element of
16247       // that vector (mask value is number-of-elements) for the high bits.
16248       if (i % ZextRatio == 0)
16249         ShufMask[i] = Extract.getConstantOperandVal(1);
16250       else
16251         ShufMask[i] = NumMaskElts;
16252     }
16253
16254     // Undef elements of the build vector remain undef because we initialize
16255     // the shuffle mask with -1.
16256   }
16257
16258   // Turn this into a shuffle with zero if that's legal.
16259   EVT VecVT = Extract.getOperand(0).getValueType();
16260   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16261     return SDValue();
16262
16263   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16264   // bitcast (shuffle V, ZeroVec, VectorMask)
16265   SDLoc DL(BV);
16266   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16267   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16268                                       ShufMask);
16269   return DAG.getBitcast(VT, Shuf);
16270 }
16271
16272 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16273 // operations. If the types of the vectors we're extracting from allow it,
16274 // turn this into a vector_shuffle node.
16275 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16276   SDLoc DL(N);
16277   EVT VT = N->getValueType(0);
16278
16279   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16280   if (!isTypeLegal(VT))
16281     return SDValue();
16282
16283   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16284     return V;
16285
16286   // May only combine to shuffle after legalize if shuffle is legal.
16287   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16288     return SDValue();
16289
16290   bool UsesZeroVector = false;
16291   unsigned NumElems = N->getNumOperands();
16292
16293   // Record, for each element of the newly built vector, which input vector
16294   // that element comes from. -1 stands for undef, 0 for the zero vector,
16295   // and positive values for the input vectors.
16296   // VectorMask maps each element to its vector number, and VecIn maps vector
16297   // numbers to their initial SDValues.
16298
16299   SmallVector<int, 8> VectorMask(NumElems, -1);
16300   SmallVector<SDValue, 8> VecIn;
16301   VecIn.push_back(SDValue());
16302
16303   for (unsigned i = 0; i != NumElems; ++i) {
16304     SDValue Op = N->getOperand(i);
16305
16306     if (Op.isUndef())
16307       continue;
16308
16309     // See if we can use a blend with a zero vector.
16310     // TODO: Should we generalize this to a blend with an arbitrary constant
16311     // vector?
16312     if (isNullConstant(Op) || isNullFPConstant(Op)) {
16313       UsesZeroVector = true;
16314       VectorMask[i] = 0;
16315       continue;
16316     }
16317
16318     // Not an undef or zero. If the input is something other than an
16319     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16320     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16321         !isa<ConstantSDNode>(Op.getOperand(1)))
16322       return SDValue();
16323     SDValue ExtractedFromVec = Op.getOperand(0);
16324
16325     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
16326     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16327       return SDValue();
16328
16329     // All inputs must have the same element type as the output.
16330     if (VT.getVectorElementType() !=
16331         ExtractedFromVec.getValueType().getVectorElementType())
16332       return SDValue();
16333
16334     // Have we seen this input vector before?
16335     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16336     // a map back from SDValues to numbers isn't worth it.
16337     unsigned Idx = std::distance(
16338         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16339     if (Idx == VecIn.size())
16340       VecIn.push_back(ExtractedFromVec);
16341
16342     VectorMask[i] = Idx;
16343   }
16344
16345   // If we didn't find at least one input vector, bail out.
16346   if (VecIn.size() < 2)
16347     return SDValue();
16348
16349   // If all the Operands of BUILD_VECTOR extract from same
16350   // vector, then split the vector efficiently based on the maximum
16351   // vector access index and adjust the VectorMask and
16352   // VecIn accordingly.
16353   if (VecIn.size() == 2) {
16354     unsigned MaxIndex = 0;
16355     unsigned NearestPow2 = 0;
16356     SDValue Vec = VecIn.back();
16357     EVT InVT = Vec.getValueType();
16358     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16359     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16360
16361     for (unsigned i = 0; i < NumElems; i++) {
16362       if (VectorMask[i] <= 0)
16363         continue;
16364       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16365       IndexVec[i] = Index;
16366       MaxIndex = std::max(MaxIndex, Index);
16367     }
16368
16369     NearestPow2 = PowerOf2Ceil(MaxIndex);
16370     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16371         NumElems * 2 < NearestPow2) {
16372       unsigned SplitSize = NearestPow2 / 2;
16373       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16374                                      InVT.getVectorElementType(), SplitSize);
16375       if (TLI.isTypeLegal(SplitVT)) {
16376         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16377                                      DAG.getConstant(SplitSize, DL, IdxTy));
16378         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16379                                      DAG.getConstant(0, DL, IdxTy));
16380         VecIn.pop_back();
16381         VecIn.push_back(VecIn1);
16382         VecIn.push_back(VecIn2);
16383
16384         for (unsigned i = 0; i < NumElems; i++) {
16385           if (VectorMask[i] <= 0)
16386             continue;
16387           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16388         }
16389       }
16390     }
16391   }
16392
16393   // TODO: We want to sort the vectors by descending length, so that adjacent
16394   // pairs have similar length, and the longer vector is always first in the
16395   // pair.
16396
16397   // TODO: Should this fire if some of the input vectors has illegal type (like
16398   // it does now), or should we let legalization run its course first?
16399
16400   // Shuffle phase:
16401   // Take pairs of vectors, and shuffle them so that the result has elements
16402   // from these vectors in the correct places.
16403   // For example, given:
16404   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16405   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16406   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16407   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16408   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16409   // We will generate:
16410   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16411   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16412   SmallVector<SDValue, 4> Shuffles;
16413   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16414     unsigned LeftIdx = 2 * In + 1;
16415     SDValue VecLeft = VecIn[LeftIdx];
16416     SDValue VecRight =
16417         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16418
16419     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16420                                                 VecRight, LeftIdx))
16421       Shuffles.push_back(Shuffle);
16422     else
16423       return SDValue();
16424   }
16425
16426   // If we need the zero vector as an "ingredient" in the blend tree, add it
16427   // to the list of shuffles.
16428   if (UsesZeroVector)
16429     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16430                                       : DAG.getConstantFP(0.0, DL, VT));
16431
16432   // If we only have one shuffle, we're done.
16433   if (Shuffles.size() == 1)
16434     return Shuffles[0];
16435
16436   // Update the vector mask to point to the post-shuffle vectors.
16437   for (int &Vec : VectorMask)
16438     if (Vec == 0)
16439       Vec = Shuffles.size() - 1;
16440     else
16441       Vec = (Vec - 1) / 2;
16442
16443   // More than one shuffle. Generate a binary tree of blends, e.g. if from
16444   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16445   // generate:
16446   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16447   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16448   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16449   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16450   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16451   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16452   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16453
16454   // Make sure the initial size of the shuffle list is even.
16455   if (Shuffles.size() % 2)
16456     Shuffles.push_back(DAG.getUNDEF(VT));
16457
16458   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16459     if (CurSize % 2) {
16460       Shuffles[CurSize] = DAG.getUNDEF(VT);
16461       CurSize++;
16462     }
16463     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16464       int Left = 2 * In;
16465       int Right = 2 * In + 1;
16466       SmallVector<int, 8> Mask(NumElems, -1);
16467       for (unsigned i = 0; i != NumElems; ++i) {
16468         if (VectorMask[i] == Left) {
16469           Mask[i] = i;
16470           VectorMask[i] = In;
16471         } else if (VectorMask[i] == Right) {
16472           Mask[i] = i + NumElems;
16473           VectorMask[i] = In;
16474         }
16475       }
16476
16477       Shuffles[In] =
16478           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16479     }
16480   }
16481   return Shuffles[0];
16482 }
16483
16484 // Try to turn a build vector of zero extends of extract vector elts into a
16485 // a vector zero extend and possibly an extract subvector.
16486 // TODO: Support sign extend or any extend?
16487 // TODO: Allow undef elements?
16488 // TODO: Don't require the extracts to start at element 0.
16489 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16490   if (LegalOperations)
16491     return SDValue();
16492
16493   EVT VT = N->getValueType(0);
16494
16495   SDValue Op0 = N->getOperand(0);
16496   auto checkElem = [&](SDValue Op) -> int64_t {
16497     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
16498         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16499         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
16500       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
16501         return C->getZExtValue();
16502     return -1;
16503   };
16504
16505   // Make sure the first element matches
16506   // (zext (extract_vector_elt X, C))
16507   int64_t Offset = checkElem(Op0);
16508   if (Offset < 0)
16509     return SDValue();
16510
16511   unsigned NumElems = N->getNumOperands();
16512   SDValue In = Op0.getOperand(0).getOperand(0);
16513   EVT InSVT = In.getValueType().getScalarType();
16514   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
16515
16516   // Don't create an illegal input type after type legalization.
16517   if (LegalTypes && !TLI.isTypeLegal(InVT))
16518     return SDValue();
16519
16520   // Ensure all the elements come from the same vector and are adjacent.
16521   for (unsigned i = 1; i != NumElems; ++i) {
16522     if ((Offset + i) != checkElem(N->getOperand(i)))
16523       return SDValue();
16524   }
16525
16526   SDLoc DL(N);
16527   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
16528                    Op0.getOperand(0).getOperand(1));
16529   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
16530 }
16531
16532 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
16533   EVT VT = N->getValueType(0);
16534
16535   // A vector built entirely of undefs is undef.
16536   if (ISD::allOperandsUndef(N))
16537     return DAG.getUNDEF(VT);
16538
16539   // If this is a splat of a bitcast from another vector, change to a
16540   // concat_vector.
16541   // For example:
16542   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
16543   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
16544   //
16545   // If X is a build_vector itself, the concat can become a larger build_vector.
16546   // TODO: Maybe this is useful for non-splat too?
16547   if (!LegalOperations) {
16548     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
16549       Splat = peekThroughBitcasts(Splat);
16550       EVT SrcVT = Splat.getValueType();
16551       if (SrcVT.isVector()) {
16552         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
16553         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
16554                                      SrcVT.getVectorElementType(), NumElts);
16555         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
16556           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
16557           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
16558                                        NewVT, Ops);
16559           return DAG.getBitcast(VT, Concat);
16560         }
16561       }
16562     }
16563   }
16564
16565   // Check if we can express BUILD VECTOR via subvector extract.
16566   if (!LegalTypes && (N->getNumOperands() > 1)) {
16567     SDValue Op0 = N->getOperand(0);
16568     auto checkElem = [&](SDValue Op) -> uint64_t {
16569       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16570           (Op0.getOperand(0) == Op.getOperand(0)))
16571         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16572           return CNode->getZExtValue();
16573       return -1;
16574     };
16575
16576     int Offset = checkElem(Op0);
16577     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16578       if (Offset + i != checkElem(N->getOperand(i))) {
16579         Offset = -1;
16580         break;
16581       }
16582     }
16583
16584     if ((Offset == 0) &&
16585         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16586       return Op0.getOperand(0);
16587     if ((Offset != -1) &&
16588         ((Offset % N->getValueType(0).getVectorNumElements()) ==
16589          0)) // IDX must be multiple of output size.
16590       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16591                          Op0.getOperand(0), Op0.getOperand(1));
16592   }
16593
16594   if (SDValue V = convertBuildVecZextToZext(N))
16595     return V;
16596
16597   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16598     return V;
16599
16600   if (SDValue V = reduceBuildVecToShuffle(N))
16601     return V;
16602
16603   return SDValue();
16604 }
16605
16606 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16607   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16608   EVT OpVT = N->getOperand(0).getValueType();
16609
16610   // If the operands are legal vectors, leave them alone.
16611   if (TLI.isTypeLegal(OpVT))
16612     return SDValue();
16613
16614   SDLoc DL(N);
16615   EVT VT = N->getValueType(0);
16616   SmallVector<SDValue, 8> Ops;
16617
16618   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16619   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16620
16621   // Keep track of what we encounter.
16622   bool AnyInteger = false;
16623   bool AnyFP = false;
16624   for (const SDValue &Op : N->ops()) {
16625     if (ISD::BITCAST == Op.getOpcode() &&
16626         !Op.getOperand(0).getValueType().isVector())
16627       Ops.push_back(Op.getOperand(0));
16628     else if (ISD::UNDEF == Op.getOpcode())
16629       Ops.push_back(ScalarUndef);
16630     else
16631       return SDValue();
16632
16633     // Note whether we encounter an integer or floating point scalar.
16634     // If it's neither, bail out, it could be something weird like x86mmx.
16635     EVT LastOpVT = Ops.back().getValueType();
16636     if (LastOpVT.isFloatingPoint())
16637       AnyFP = true;
16638     else if (LastOpVT.isInteger())
16639       AnyInteger = true;
16640     else
16641       return SDValue();
16642   }
16643
16644   // If any of the operands is a floating point scalar bitcast to a vector,
16645   // use floating point types throughout, and bitcast everything.
16646   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16647   if (AnyFP) {
16648     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16649     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16650     if (AnyInteger) {
16651       for (SDValue &Op : Ops) {
16652         if (Op.getValueType() == SVT)
16653           continue;
16654         if (Op.isUndef())
16655           Op = ScalarUndef;
16656         else
16657           Op = DAG.getBitcast(SVT, Op);
16658       }
16659     }
16660   }
16661
16662   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16663                                VT.getSizeInBits() / SVT.getSizeInBits());
16664   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16665 }
16666
16667 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16668 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16669 // most two distinct vectors the same size as the result, attempt to turn this
16670 // into a legal shuffle.
16671 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16672   EVT VT = N->getValueType(0);
16673   EVT OpVT = N->getOperand(0).getValueType();
16674   int NumElts = VT.getVectorNumElements();
16675   int NumOpElts = OpVT.getVectorNumElements();
16676
16677   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16678   SmallVector<int, 8> Mask;
16679
16680   for (SDValue Op : N->ops()) {
16681     Op = peekThroughBitcasts(Op);
16682
16683     // UNDEF nodes convert to UNDEF shuffle mask values.
16684     if (Op.isUndef()) {
16685       Mask.append((unsigned)NumOpElts, -1);
16686       continue;
16687     }
16688
16689     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16690       return SDValue();
16691
16692     // What vector are we extracting the subvector from and at what index?
16693     SDValue ExtVec = Op.getOperand(0);
16694
16695     // We want the EVT of the original extraction to correctly scale the
16696     // extraction index.
16697     EVT ExtVT = ExtVec.getValueType();
16698     ExtVec = peekThroughBitcasts(ExtVec);
16699
16700     // UNDEF nodes convert to UNDEF shuffle mask values.
16701     if (ExtVec.isUndef()) {
16702       Mask.append((unsigned)NumOpElts, -1);
16703       continue;
16704     }
16705
16706     if (!isa<ConstantSDNode>(Op.getOperand(1)))
16707       return SDValue();
16708     int ExtIdx = Op.getConstantOperandVal(1);
16709
16710     // Ensure that we are extracting a subvector from a vector the same
16711     // size as the result.
16712     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16713       return SDValue();
16714
16715     // Scale the subvector index to account for any bitcast.
16716     int NumExtElts = ExtVT.getVectorNumElements();
16717     if (0 == (NumExtElts % NumElts))
16718       ExtIdx /= (NumExtElts / NumElts);
16719     else if (0 == (NumElts % NumExtElts))
16720       ExtIdx *= (NumElts / NumExtElts);
16721     else
16722       return SDValue();
16723
16724     // At most we can reference 2 inputs in the final shuffle.
16725     if (SV0.isUndef() || SV0 == ExtVec) {
16726       SV0 = ExtVec;
16727       for (int i = 0; i != NumOpElts; ++i)
16728         Mask.push_back(i + ExtIdx);
16729     } else if (SV1.isUndef() || SV1 == ExtVec) {
16730       SV1 = ExtVec;
16731       for (int i = 0; i != NumOpElts; ++i)
16732         Mask.push_back(i + ExtIdx + NumElts);
16733     } else {
16734       return SDValue();
16735     }
16736   }
16737
16738   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16739     return SDValue();
16740
16741   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16742                               DAG.getBitcast(VT, SV1), Mask);
16743 }
16744
16745 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16746   // If we only have one input vector, we don't need to do any concatenation.
16747   if (N->getNumOperands() == 1)
16748     return N->getOperand(0);
16749
16750   // Check if all of the operands are undefs.
16751   EVT VT = N->getValueType(0);
16752   if (ISD::allOperandsUndef(N))
16753     return DAG.getUNDEF(VT);
16754
16755   // Optimize concat_vectors where all but the first of the vectors are undef.
16756   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16757         return Op.isUndef();
16758       })) {
16759     SDValue In = N->getOperand(0);
16760     assert(In.getValueType().isVector() && "Must concat vectors");
16761
16762     SDValue Scalar = peekThroughOneUseBitcasts(In);
16763
16764     // concat_vectors(scalar_to_vector(scalar), undef) ->
16765     //     scalar_to_vector(scalar)
16766     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16767          Scalar.hasOneUse()) {
16768       EVT SVT = Scalar.getValueType().getVectorElementType();
16769       if (SVT == Scalar.getOperand(0).getValueType())
16770         Scalar = Scalar.getOperand(0);
16771     }
16772
16773     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
16774     if (!Scalar.getValueType().isVector()) {
16775       // If the bitcast type isn't legal, it might be a trunc of a legal type;
16776       // look through the trunc so we can still do the transform:
16777       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16778       if (Scalar->getOpcode() == ISD::TRUNCATE &&
16779           !TLI.isTypeLegal(Scalar.getValueType()) &&
16780           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16781         Scalar = Scalar->getOperand(0);
16782
16783       EVT SclTy = Scalar.getValueType();
16784
16785       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16786         return SDValue();
16787
16788       // Bail out if the vector size is not a multiple of the scalar size.
16789       if (VT.getSizeInBits() % SclTy.getSizeInBits())
16790         return SDValue();
16791
16792       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16793       if (VNTNumElms < 2)
16794         return SDValue();
16795
16796       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16797       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16798         return SDValue();
16799
16800       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16801       return DAG.getBitcast(VT, Res);
16802     }
16803   }
16804
16805   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16806   // We have already tested above for an UNDEF only concatenation.
16807   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16808   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16809   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16810     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16811   };
16812   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16813     SmallVector<SDValue, 8> Opnds;
16814     EVT SVT = VT.getScalarType();
16815
16816     EVT MinVT = SVT;
16817     if (!SVT.isFloatingPoint()) {
16818       // If BUILD_VECTOR are from built from integer, they may have different
16819       // operand types. Get the smallest type and truncate all operands to it.
16820       bool FoundMinVT = false;
16821       for (const SDValue &Op : N->ops())
16822         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16823           EVT OpSVT = Op.getOperand(0).getValueType();
16824           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16825           FoundMinVT = true;
16826         }
16827       assert(FoundMinVT && "Concat vector type mismatch");
16828     }
16829
16830     for (const SDValue &Op : N->ops()) {
16831       EVT OpVT = Op.getValueType();
16832       unsigned NumElts = OpVT.getVectorNumElements();
16833
16834       if (ISD::UNDEF == Op.getOpcode())
16835         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16836
16837       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16838         if (SVT.isFloatingPoint()) {
16839           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16840           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16841         } else {
16842           for (unsigned i = 0; i != NumElts; ++i)
16843             Opnds.push_back(
16844                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16845         }
16846       }
16847     }
16848
16849     assert(VT.getVectorNumElements() == Opnds.size() &&
16850            "Concat vector type mismatch");
16851     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16852   }
16853
16854   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16855   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16856     return V;
16857
16858   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16859   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16860     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16861       return V;
16862
16863   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16864   // nodes often generate nop CONCAT_VECTOR nodes.
16865   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16866   // place the incoming vectors at the exact same location.
16867   SDValue SingleSource = SDValue();
16868   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16869
16870   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16871     SDValue Op = N->getOperand(i);
16872
16873     if (Op.isUndef())
16874       continue;
16875
16876     // Check if this is the identity extract:
16877     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16878       return SDValue();
16879
16880     // Find the single incoming vector for the extract_subvector.
16881     if (SingleSource.getNode()) {
16882       if (Op.getOperand(0) != SingleSource)
16883         return SDValue();
16884     } else {
16885       SingleSource = Op.getOperand(0);
16886
16887       // Check the source type is the same as the type of the result.
16888       // If not, this concat may extend the vector, so we can not
16889       // optimize it away.
16890       if (SingleSource.getValueType() != N->getValueType(0))
16891         return SDValue();
16892     }
16893
16894     unsigned IdentityIndex = i * PartNumElem;
16895     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16896     // The extract index must be constant.
16897     if (!CS)
16898       return SDValue();
16899
16900     // Check that we are reading from the identity index.
16901     if (CS->getZExtValue() != IdentityIndex)
16902       return SDValue();
16903   }
16904
16905   if (SingleSource.getNode())
16906     return SingleSource;
16907
16908   return SDValue();
16909 }
16910
16911 /// If we are extracting a subvector produced by a wide binary operator try
16912 /// to use a narrow binary operator and/or avoid concatenation and extraction.
16913 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
16914   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16915   // some of these bailouts with other transforms.
16916
16917   // The extract index must be a constant, so we can map it to a concat operand.
16918   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16919   if (!ExtractIndexC)
16920     return SDValue();
16921
16922   // We are looking for an optionally bitcasted wide vector binary operator
16923   // feeding an extract subvector.
16924   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
16925   if (!ISD::isBinaryOp(BinOp.getNode()))
16926     return SDValue();
16927
16928   // The binop must be a vector type, so we can extract some fraction of it.
16929   EVT WideBVT = BinOp.getValueType();
16930   if (!WideBVT.isVector())
16931     return SDValue();
16932
16933   EVT VT = Extract->getValueType(0);
16934   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
16935   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
16936          "Extract index is not a multiple of the vector length.");
16937
16938   // Bail out if this is not a proper multiple width extraction.
16939   unsigned WideWidth = WideBVT.getSizeInBits();
16940   unsigned NarrowWidth = VT.getSizeInBits();
16941   if (WideWidth % NarrowWidth != 0)
16942     return SDValue();
16943
16944   // Bail out if we are extracting a fraction of a single operation. This can
16945   // occur because we potentially looked through a bitcast of the binop.
16946   unsigned NarrowingRatio = WideWidth / NarrowWidth;
16947   unsigned WideNumElts = WideBVT.getVectorNumElements();
16948   if (WideNumElts % NarrowingRatio != 0)
16949     return SDValue();
16950
16951   // Bail out if the target does not support a narrower version of the binop.
16952   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16953                                    WideNumElts / NarrowingRatio);
16954   unsigned BOpcode = BinOp.getOpcode();
16955   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16956   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16957     return SDValue();
16958
16959   // If extraction is cheap, we don't need to look at the binop operands
16960   // for concat ops. The narrow binop alone makes this transform profitable.
16961   // We can't just reuse the original extract index operand because we may have
16962   // bitcasted.
16963   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
16964   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16965   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16966   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
16967       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
16968     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
16969     SDLoc DL(Extract);
16970     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
16971     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16972                             BinOp.getOperand(0), NewExtIndex);
16973     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16974                             BinOp.getOperand(1), NewExtIndex);
16975     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
16976                                       BinOp.getNode()->getFlags());
16977     return DAG.getBitcast(VT, NarrowBinOp);
16978   }
16979
16980   // Only handle the case where we are doubling and then halving. A larger ratio
16981   // may require more than two narrow binops to replace the wide binop.
16982   if (NarrowingRatio != 2)
16983     return SDValue();
16984
16985   // TODO: The motivating case for this transform is an x86 AVX1 target. That
16986   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16987   // flavors, but no other 256-bit integer support. This could be extended to
16988   // handle any binop, but that may require fixing/adding other folds to avoid
16989   // codegen regressions.
16990   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16991     return SDValue();
16992
16993   // We need at least one concatenation operation of a binop operand to make
16994   // this transform worthwhile. The concat must double the input vector sizes.
16995   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16996   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
16997   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
16998   bool ConcatL =
16999       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17000   bool ConcatR =
17001       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17002   if (!ConcatL && !ConcatR)
17003     return SDValue();
17004
17005   // If one of the binop operands was not the result of a concat, we must
17006   // extract a half-sized operand for our new narrow binop.
17007   SDLoc DL(Extract);
17008
17009   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17010   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
17011   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
17012   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17013                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17014                                     BinOp.getOperand(0),
17015                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17016
17017   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17018                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17019                                     BinOp.getOperand(1),
17020                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17021
17022   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17023   return DAG.getBitcast(VT, NarrowBinOp);
17024 }
17025
17026 /// If we are extracting a subvector from a wide vector load, convert to a
17027 /// narrow load to eliminate the extraction:
17028 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17029 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
17030   // TODO: Add support for big-endian. The offset calculation must be adjusted.
17031   if (DAG.getDataLayout().isBigEndian())
17032     return SDValue();
17033
17034   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17035   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17036   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17037     return SDValue();
17038
17039   // Allow targets to opt-out.
17040   EVT VT = Extract->getValueType(0);
17041   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17042   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17043     return SDValue();
17044
17045   // The narrow load will be offset from the base address of the old load if
17046   // we are extracting from something besides index 0 (little-endian).
17047   SDLoc DL(Extract);
17048   SDValue BaseAddr = Ld->getOperand(1);
17049   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17050
17051   // TODO: Use "BaseIndexOffset" to make this more effective.
17052   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17053   MachineFunction &MF = DAG.getMachineFunction();
17054   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17055                                                    VT.getStoreSize());
17056   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17057   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17058   return NewLd;
17059 }
17060
17061 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
17062   EVT NVT = N->getValueType(0);
17063   SDValue V = N->getOperand(0);
17064
17065   // Extract from UNDEF is UNDEF.
17066   if (V.isUndef())
17067     return DAG.getUNDEF(NVT);
17068
17069   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
17070     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17071       return NarrowLoad;
17072
17073   // Combine:
17074   //    (extract_subvec (concat V1, V2, ...), i)
17075   // Into:
17076   //    Vi if possible
17077   // Only operand 0 is checked as 'concat' assumes all inputs of the same
17078   // type.
17079   if (V.getOpcode() == ISD::CONCAT_VECTORS &&
17080       isa<ConstantSDNode>(N->getOperand(1)) &&
17081       V.getOperand(0).getValueType() == NVT) {
17082     unsigned Idx = N->getConstantOperandVal(1);
17083     unsigned NumElems = NVT.getVectorNumElements();
17084     assert((Idx % NumElems) == 0 &&
17085            "IDX in concat is not a multiple of the result vector length.");
17086     return V->getOperand(Idx / NumElems);
17087   }
17088
17089   V = peekThroughBitcasts(V);
17090
17091   // If the input is a build vector. Try to make a smaller build vector.
17092   if (V.getOpcode() == ISD::BUILD_VECTOR) {
17093     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17094       EVT InVT = V.getValueType();
17095       unsigned ExtractSize = NVT.getSizeInBits();
17096       unsigned EltSize = InVT.getScalarSizeInBits();
17097       // Only do this if we won't split any elements.
17098       if (ExtractSize % EltSize == 0) {
17099         unsigned NumElems = ExtractSize / EltSize;
17100         EVT EltVT = InVT.getVectorElementType();
17101         EVT ExtractVT = NumElems == 1 ? EltVT :
17102           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
17103         if ((Level < AfterLegalizeDAG ||
17104              (NumElems == 1 ||
17105               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17106             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17107           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
17108                             EltSize;
17109           if (NumElems == 1) {
17110             SDValue Src = V->getOperand(IdxVal);
17111             if (EltVT != Src.getValueType())
17112               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17113
17114             return DAG.getBitcast(NVT, Src);
17115           }
17116
17117           // Extract the pieces from the original build_vector.
17118           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
17119                                             makeArrayRef(V->op_begin() + IdxVal,
17120                                                          NumElems));
17121           return DAG.getBitcast(NVT, BuildVec);
17122         }
17123       }
17124     }
17125   }
17126
17127   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17128     // Handle only simple case where vector being inserted and vector
17129     // being extracted are of same size.
17130     EVT SmallVT = V.getOperand(1).getValueType();
17131     if (!NVT.bitsEq(SmallVT))
17132       return SDValue();
17133
17134     // Only handle cases where both indexes are constants.
17135     auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
17136     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17137
17138     if (InsIdx && ExtIdx) {
17139       // Combine:
17140       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17141       // Into:
17142       //    indices are equal or bit offsets are equal => V1
17143       //    otherwise => (extract_subvec V1, ExtIdx)
17144       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17145           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17146         return DAG.getBitcast(NVT, V.getOperand(1));
17147       return DAG.getNode(
17148           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17149           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17150                          N->getOperand(1));
17151     }
17152   }
17153
17154   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17155     return NarrowBOp;
17156
17157   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17158     return SDValue(N, 0);
17159
17160   return SDValue();
17161 }
17162
17163 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17164 // or turn a shuffle of a single concat into simpler shuffle then concat.
17165 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
17166   EVT VT = N->getValueType(0);
17167   unsigned NumElts = VT.getVectorNumElements();
17168
17169   SDValue N0 = N->getOperand(0);
17170   SDValue N1 = N->getOperand(1);
17171   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17172
17173   SmallVector<SDValue, 4> Ops;
17174   EVT ConcatVT = N0.getOperand(0).getValueType();
17175   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17176   unsigned NumConcats = NumElts / NumElemsPerConcat;
17177
17178   // Special case: shuffle(concat(A,B)) can be more efficiently represented
17179   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17180   // half vector elements.
17181   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17182       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
17183                   SVN->getMask().end(), [](int i) { return i == -1; })) {
17184     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
17185                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
17186     N1 = DAG.getUNDEF(ConcatVT);
17187     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17188   }
17189
17190   // Look at every vector that's inserted. We're looking for exact
17191   // subvector-sized copies from a concatenated vector
17192   for (unsigned I = 0; I != NumConcats; ++I) {
17193     // Make sure we're dealing with a copy.
17194     unsigned Begin = I * NumElemsPerConcat;
17195     bool AllUndef = true, NoUndef = true;
17196     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
17197       if (SVN->getMaskElt(J) >= 0)
17198         AllUndef = false;
17199       else
17200         NoUndef = false;
17201     }
17202
17203     if (NoUndef) {
17204       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
17205         return SDValue();
17206
17207       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
17208         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
17209           return SDValue();
17210
17211       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
17212       if (FirstElt < N0.getNumOperands())
17213         Ops.push_back(N0.getOperand(FirstElt));
17214       else
17215         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
17216
17217     } else if (AllUndef) {
17218       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
17219     } else { // Mixed with general masks and undefs, can't do optimization.
17220       return SDValue();
17221     }
17222   }
17223
17224   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17225 }
17226
17227 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17228 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17229 //
17230 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17231 // a simplification in some sense, but it isn't appropriate in general: some
17232 // BUILD_VECTORs are substantially cheaper than others. The general case
17233 // of a BUILD_VECTOR requires inserting each element individually (or
17234 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17235 // all constants is a single constant pool load.  A BUILD_VECTOR where each
17236 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
17237 // are undef lowers to a small number of element insertions.
17238 //
17239 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17240 // We don't fold shuffles where one side is a non-zero constant, and we don't
17241 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17242 // non-constant operands. This seems to work out reasonably well in practice.
17243 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
17244                                        SelectionDAG &DAG,
17245                                        const TargetLowering &TLI) {
17246   EVT VT = SVN->getValueType(0);
17247   unsigned NumElts = VT.getVectorNumElements();
17248   SDValue N0 = SVN->getOperand(0);
17249   SDValue N1 = SVN->getOperand(1);
17250
17251   if (!N0->hasOneUse())
17252     return SDValue();
17253
17254   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17255   // discussed above.
17256   if (!N1.isUndef()) {
17257     if (!N1->hasOneUse())
17258       return SDValue();
17259
17260     bool N0AnyConst = isAnyConstantBuildVector(N0);
17261     bool N1AnyConst = isAnyConstantBuildVector(N1);
17262     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17263       return SDValue();
17264     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17265       return SDValue();
17266   }
17267
17268   // If both inputs are splats of the same value then we can safely merge this
17269   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17270   bool IsSplat = false;
17271   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17272   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17273   if (BV0 && BV1)
17274     if (SDValue Splat0 = BV0->getSplatValue())
17275       IsSplat = (Splat0 == BV1->getSplatValue());
17276
17277   SmallVector<SDValue, 8> Ops;
17278   SmallSet<SDValue, 16> DuplicateOps;
17279   for (int M : SVN->getMask()) {
17280     SDValue Op = DAG.getUNDEF(VT.getScalarType());
17281     if (M >= 0) {
17282       int Idx = M < (int)NumElts ? M : M - NumElts;
17283       SDValue &S = (M < (int)NumElts ? N0 : N1);
17284       if (S.getOpcode() == ISD::BUILD_VECTOR) {
17285         Op = S.getOperand(Idx);
17286       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17287         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
17288         Op = S.getOperand(0);
17289       } else {
17290         // Operand can't be combined - bail out.
17291         return SDValue();
17292       }
17293     }
17294
17295     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17296     // generating a splat; semantically, this is fine, but it's likely to
17297     // generate low-quality code if the target can't reconstruct an appropriate
17298     // shuffle.
17299     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
17300       if (!IsSplat && !DuplicateOps.insert(Op).second)
17301         return SDValue();
17302
17303     Ops.push_back(Op);
17304   }
17305
17306   // BUILD_VECTOR requires all inputs to be of the same type, find the
17307   // maximum type and extend them all.
17308   EVT SVT = VT.getScalarType();
17309   if (SVT.isInteger())
17310     for (SDValue &Op : Ops)
17311       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17312   if (SVT != VT.getScalarType())
17313     for (SDValue &Op : Ops)
17314       Op = TLI.isZExtFree(Op.getValueType(), SVT)
17315                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17316                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17317   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17318 }
17319
17320 // Match shuffles that can be converted to any_vector_extend_in_reg.
17321 // This is often generated during legalization.
17322 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17323 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17324 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
17325                                             SelectionDAG &DAG,
17326                                             const TargetLowering &TLI,
17327                                             bool LegalOperations) {
17328   EVT VT = SVN->getValueType(0);
17329   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17330
17331   // TODO Add support for big-endian when we have a test case.
17332   if (!VT.isInteger() || IsBigEndian)
17333     return SDValue();
17334
17335   unsigned NumElts = VT.getVectorNumElements();
17336   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17337   ArrayRef<int> Mask = SVN->getMask();
17338   SDValue N0 = SVN->getOperand(0);
17339
17340   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17341   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17342     for (unsigned i = 0; i != NumElts; ++i) {
17343       if (Mask[i] < 0)
17344         continue;
17345       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17346         continue;
17347       return false;
17348     }
17349     return true;
17350   };
17351
17352   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17353   // power-of-2 extensions as they are the most likely.
17354   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17355     // Check for non power of 2 vector sizes
17356     if (NumElts % Scale != 0)
17357       continue;
17358     if (!isAnyExtend(Scale))
17359       continue;
17360
17361     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17362     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17363     // Never create an illegal type. Only create unsupported operations if we
17364     // are pre-legalization.
17365     if (TLI.isTypeLegal(OutVT))
17366       if (!LegalOperations ||
17367           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17368         return DAG.getBitcast(VT,
17369                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
17370                                           SDLoc(SVN), OutVT, N0));
17371   }
17372
17373   return SDValue();
17374 }
17375
17376 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17377 // each source element of a large type into the lowest elements of a smaller
17378 // destination type. This is often generated during legalization.
17379 // If the source node itself was a '*_extend_vector_inreg' node then we should
17380 // then be able to remove it.
17381 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17382                                         SelectionDAG &DAG) {
17383   EVT VT = SVN->getValueType(0);
17384   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17385
17386   // TODO Add support for big-endian when we have a test case.
17387   if (!VT.isInteger() || IsBigEndian)
17388     return SDValue();
17389
17390   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17391
17392   unsigned Opcode = N0.getOpcode();
17393   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17394       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17395       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17396     return SDValue();
17397
17398   SDValue N00 = N0.getOperand(0);
17399   ArrayRef<int> Mask = SVN->getMask();
17400   unsigned NumElts = VT.getVectorNumElements();
17401   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17402   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17403   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17404
17405   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17406     return SDValue();
17407   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17408
17409   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17410   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17411   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17412   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17413     for (unsigned i = 0; i != NumElts; ++i) {
17414       if (Mask[i] < 0)
17415         continue;
17416       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17417         continue;
17418       return false;
17419     }
17420     return true;
17421   };
17422
17423   // At the moment we just handle the case where we've truncated back to the
17424   // same size as before the extension.
17425   // TODO: handle more extension/truncation cases as cases arise.
17426   if (EltSizeInBits != ExtSrcSizeInBits)
17427     return SDValue();
17428
17429   // We can remove *extend_vector_inreg only if the truncation happens at
17430   // the same scale as the extension.
17431   if (isTruncate(ExtScale))
17432     return DAG.getBitcast(VT, N00);
17433
17434   return SDValue();
17435 }
17436
17437 // Combine shuffles of splat-shuffles of the form:
17438 // shuffle (shuffle V, undef, splat-mask), undef, M
17439 // If splat-mask contains undef elements, we need to be careful about
17440 // introducing undef's in the folded mask which are not the result of composing
17441 // the masks of the shuffles.
17442 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
17443                                      ShuffleVectorSDNode *Splat,
17444                                      SelectionDAG &DAG) {
17445   ArrayRef<int> SplatMask = Splat->getMask();
17446   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
17447
17448   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
17449   // every undef mask element in the splat-shuffle has a corresponding undef
17450   // element in the user-shuffle's mask or if the composition of mask elements
17451   // would result in undef.
17452   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
17453   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
17454   //   In this case it is not legal to simplify to the splat-shuffle because we
17455   //   may be exposing the users of the shuffle an undef element at index 1
17456   //   which was not there before the combine.
17457   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
17458   //   In this case the composition of masks yields SplatMask, so it's ok to
17459   //   simplify to the splat-shuffle.
17460   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
17461   //   In this case the composed mask includes all undef elements of SplatMask
17462   //   and in addition sets element zero to undef. It is safe to simplify to
17463   //   the splat-shuffle.
17464   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
17465                                        ArrayRef<int> SplatMask) {
17466     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
17467       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
17468           SplatMask[UserMask[i]] != -1)
17469         return false;
17470     return true;
17471   };
17472   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
17473     return SDValue(Splat, 0);
17474
17475   // Create a new shuffle with a mask that is composed of the two shuffles'
17476   // masks.
17477   SmallVector<int, 32> NewMask;
17478   for (int Idx : UserMask)
17479     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
17480
17481   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
17482                               Splat->getOperand(0), Splat->getOperand(1),
17483                               NewMask);
17484 }
17485
17486 /// If the shuffle mask is taking exactly one element from the first vector
17487 /// operand and passing through all other elements from the second vector
17488 /// operand, return the index of the mask element that is choosing an element
17489 /// from the first operand. Otherwise, return -1.
17490 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
17491   int MaskSize = Mask.size();
17492   int EltFromOp0 = -1;
17493   // TODO: This does not match if there are undef elements in the shuffle mask.
17494   // Should we ignore undefs in the shuffle mask instead? The trade-off is
17495   // removing an instruction (a shuffle), but losing the knowledge that some
17496   // vector lanes are not needed.
17497   for (int i = 0; i != MaskSize; ++i) {
17498     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
17499       // We're looking for a shuffle of exactly one element from operand 0.
17500       if (EltFromOp0 != -1)
17501         return -1;
17502       EltFromOp0 = i;
17503     } else if (Mask[i] != i + MaskSize) {
17504       // Nothing from operand 1 can change lanes.
17505       return -1;
17506     }
17507   }
17508   return EltFromOp0;
17509 }
17510
17511 /// If a shuffle inserts exactly one element from a source vector operand into
17512 /// another vector operand and we can access the specified element as a scalar,
17513 /// then we can eliminate the shuffle.
17514 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
17515                                       SelectionDAG &DAG) {
17516   // First, check if we are taking one element of a vector and shuffling that
17517   // element into another vector.
17518   ArrayRef<int> Mask = Shuf->getMask();
17519   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
17520   SDValue Op0 = Shuf->getOperand(0);
17521   SDValue Op1 = Shuf->getOperand(1);
17522   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
17523   if (ShufOp0Index == -1) {
17524     // Commute mask and check again.
17525     ShuffleVectorSDNode::commuteMask(CommutedMask);
17526     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
17527     if (ShufOp0Index == -1)
17528       return SDValue();
17529     // Commute operands to match the commuted shuffle mask.
17530     std::swap(Op0, Op1);
17531     Mask = CommutedMask;
17532   }
17533
17534   // The shuffle inserts exactly one element from operand 0 into operand 1.
17535   // Now see if we can access that element as a scalar via a real insert element
17536   // instruction.
17537   // TODO: We can try harder to locate the element as a scalar. Examples: it
17538   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
17539   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
17540          "Shuffle mask value must be from operand 0");
17541   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
17542     return SDValue();
17543
17544   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
17545   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
17546     return SDValue();
17547
17548   // There's an existing insertelement with constant insertion index, so we
17549   // don't need to check the legality/profitability of a replacement operation
17550   // that differs at most in the constant value. The target should be able to
17551   // lower any of those in a similar way. If not, legalization will expand this
17552   // to a scalar-to-vector plus shuffle.
17553   //
17554   // Note that the shuffle may move the scalar from the position that the insert
17555   // element used. Therefore, our new insert element occurs at the shuffle's
17556   // mask index value, not the insert's index value.
17557   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
17558   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
17559                                         Op0.getOperand(2).getValueType());
17560   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
17561                      Op1, Op0.getOperand(1), NewInsIndex);
17562 }
17563
17564 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
17565   EVT VT = N->getValueType(0);
17566   unsigned NumElts = VT.getVectorNumElements();
17567
17568   SDValue N0 = N->getOperand(0);
17569   SDValue N1 = N->getOperand(1);
17570
17571   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
17572
17573   // Canonicalize shuffle undef, undef -> undef
17574   if (N0.isUndef() && N1.isUndef())
17575     return DAG.getUNDEF(VT);
17576
17577   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17578
17579   // Canonicalize shuffle v, v -> v, undef
17580   if (N0 == N1) {
17581     SmallVector<int, 8> NewMask;
17582     for (unsigned i = 0; i != NumElts; ++i) {
17583       int Idx = SVN->getMaskElt(i);
17584       if (Idx >= (int)NumElts) Idx -= NumElts;
17585       NewMask.push_back(Idx);
17586     }
17587     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
17588   }
17589
17590   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
17591   if (N0.isUndef())
17592     return DAG.getCommutedVectorShuffle(*SVN);
17593
17594   // Remove references to rhs if it is undef
17595   if (N1.isUndef()) {
17596     bool Changed = false;
17597     SmallVector<int, 8> NewMask;
17598     for (unsigned i = 0; i != NumElts; ++i) {
17599       int Idx = SVN->getMaskElt(i);
17600       if (Idx >= (int)NumElts) {
17601         Idx = -1;
17602         Changed = true;
17603       }
17604       NewMask.push_back(Idx);
17605     }
17606     if (Changed)
17607       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17608   }
17609
17610   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17611     return InsElt;
17612
17613   // A shuffle of a single vector that is a splat can always be folded.
17614   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17615     if (N1->isUndef() && N0Shuf->isSplat())
17616       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17617
17618   // If it is a splat, check if the argument vector is another splat or a
17619   // build_vector.
17620   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17621     SDNode *V = N0.getNode();
17622
17623     // If this is a bit convert that changes the element type of the vector but
17624     // not the number of vector elements, look through it.  Be careful not to
17625     // look though conversions that change things like v4f32 to v2f64.
17626     if (V->getOpcode() == ISD::BITCAST) {
17627       SDValue ConvInput = V->getOperand(0);
17628       if (ConvInput.getValueType().isVector() &&
17629           ConvInput.getValueType().getVectorNumElements() == NumElts)
17630         V = ConvInput.getNode();
17631     }
17632
17633     if (V->getOpcode() == ISD::BUILD_VECTOR) {
17634       assert(V->getNumOperands() == NumElts &&
17635              "BUILD_VECTOR has wrong number of operands");
17636       SDValue Base;
17637       bool AllSame = true;
17638       for (unsigned i = 0; i != NumElts; ++i) {
17639         if (!V->getOperand(i).isUndef()) {
17640           Base = V->getOperand(i);
17641           break;
17642         }
17643       }
17644       // Splat of <u, u, u, u>, return <u, u, u, u>
17645       if (!Base.getNode())
17646         return N0;
17647       for (unsigned i = 0; i != NumElts; ++i) {
17648         if (V->getOperand(i) != Base) {
17649           AllSame = false;
17650           break;
17651         }
17652       }
17653       // Splat of <x, x, x, x>, return <x, x, x, x>
17654       if (AllSame)
17655         return N0;
17656
17657       // Canonicalize any other splat as a build_vector.
17658       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17659       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17660       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17661
17662       // We may have jumped through bitcasts, so the type of the
17663       // BUILD_VECTOR may not match the type of the shuffle.
17664       if (V->getValueType(0) != VT)
17665         NewBV = DAG.getBitcast(VT, NewBV);
17666       return NewBV;
17667     }
17668   }
17669
17670   // Simplify source operands based on shuffle mask.
17671   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17672     return SDValue(N, 0);
17673
17674   // Match shuffles that can be converted to any_vector_extend_in_reg.
17675   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
17676     return V;
17677
17678   // Combine "truncate_vector_in_reg" style shuffles.
17679   if (SDValue V = combineTruncationShuffle(SVN, DAG))
17680     return V;
17681
17682   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17683       Level < AfterLegalizeVectorOps &&
17684       (N1.isUndef() ||
17685       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17686        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17687     if (SDValue V = partitionShuffleOfConcats(N, DAG))
17688       return V;
17689   }
17690
17691   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17692   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17693   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
17694     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17695       return Res;
17696
17697   // If this shuffle only has a single input that is a bitcasted shuffle,
17698   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17699   // back to their original types.
17700   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17701       N1.isUndef() && Level < AfterLegalizeVectorOps &&
17702       TLI.isTypeLegal(VT)) {
17703     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17704       if (Scale == 1)
17705         return SmallVector<int, 8>(Mask.begin(), Mask.end());
17706
17707       SmallVector<int, 8> NewMask;
17708       for (int M : Mask)
17709         for (int s = 0; s != Scale; ++s)
17710           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17711       return NewMask;
17712     };
17713
17714     SDValue BC0 = peekThroughOneUseBitcasts(N0);
17715     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17716       EVT SVT = VT.getScalarType();
17717       EVT InnerVT = BC0->getValueType(0);
17718       EVT InnerSVT = InnerVT.getScalarType();
17719
17720       // Determine which shuffle works with the smaller scalar type.
17721       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17722       EVT ScaleSVT = ScaleVT.getScalarType();
17723
17724       if (TLI.isTypeLegal(ScaleVT) &&
17725           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17726           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17727         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17728         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17729
17730         // Scale the shuffle masks to the smaller scalar type.
17731         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17732         SmallVector<int, 8> InnerMask =
17733             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17734         SmallVector<int, 8> OuterMask =
17735             ScaleShuffleMask(SVN->getMask(), OuterScale);
17736
17737         // Merge the shuffle masks.
17738         SmallVector<int, 8> NewMask;
17739         for (int M : OuterMask)
17740           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17741
17742         // Test for shuffle mask legality over both commutations.
17743         SDValue SV0 = BC0->getOperand(0);
17744         SDValue SV1 = BC0->getOperand(1);
17745         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17746         if (!LegalMask) {
17747           std::swap(SV0, SV1);
17748           ShuffleVectorSDNode::commuteMask(NewMask);
17749           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17750         }
17751
17752         if (LegalMask) {
17753           SV0 = DAG.getBitcast(ScaleVT, SV0);
17754           SV1 = DAG.getBitcast(ScaleVT, SV1);
17755           return DAG.getBitcast(
17756               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17757         }
17758       }
17759     }
17760   }
17761
17762   // Canonicalize shuffles according to rules:
17763   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17764   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17765   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17766   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17767       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17768       TLI.isTypeLegal(VT)) {
17769     // The incoming shuffle must be of the same type as the result of the
17770     // current shuffle.
17771     assert(N1->getOperand(0).getValueType() == VT &&
17772            "Shuffle types don't match");
17773
17774     SDValue SV0 = N1->getOperand(0);
17775     SDValue SV1 = N1->getOperand(1);
17776     bool HasSameOp0 = N0 == SV0;
17777     bool IsSV1Undef = SV1.isUndef();
17778     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17779       // Commute the operands of this shuffle so that next rule
17780       // will trigger.
17781       return DAG.getCommutedVectorShuffle(*SVN);
17782   }
17783
17784   // Try to fold according to rules:
17785   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17786   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17787   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17788   // Don't try to fold shuffles with illegal type.
17789   // Only fold if this shuffle is the only user of the other shuffle.
17790   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17791       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17792     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17793
17794     // Don't try to fold splats; they're likely to simplify somehow, or they
17795     // might be free.
17796     if (OtherSV->isSplat())
17797       return SDValue();
17798
17799     // The incoming shuffle must be of the same type as the result of the
17800     // current shuffle.
17801     assert(OtherSV->getOperand(0).getValueType() == VT &&
17802            "Shuffle types don't match");
17803
17804     SDValue SV0, SV1;
17805     SmallVector<int, 4> Mask;
17806     // Compute the combined shuffle mask for a shuffle with SV0 as the first
17807     // operand, and SV1 as the second operand.
17808     for (unsigned i = 0; i != NumElts; ++i) {
17809       int Idx = SVN->getMaskElt(i);
17810       if (Idx < 0) {
17811         // Propagate Undef.
17812         Mask.push_back(Idx);
17813         continue;
17814       }
17815
17816       SDValue CurrentVec;
17817       if (Idx < (int)NumElts) {
17818         // This shuffle index refers to the inner shuffle N0. Lookup the inner
17819         // shuffle mask to identify which vector is actually referenced.
17820         Idx = OtherSV->getMaskElt(Idx);
17821         if (Idx < 0) {
17822           // Propagate Undef.
17823           Mask.push_back(Idx);
17824           continue;
17825         }
17826
17827         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17828                                            : OtherSV->getOperand(1);
17829       } else {
17830         // This shuffle index references an element within N1.
17831         CurrentVec = N1;
17832       }
17833
17834       // Simple case where 'CurrentVec' is UNDEF.
17835       if (CurrentVec.isUndef()) {
17836         Mask.push_back(-1);
17837         continue;
17838       }
17839
17840       // Canonicalize the shuffle index. We don't know yet if CurrentVec
17841       // will be the first or second operand of the combined shuffle.
17842       Idx = Idx % NumElts;
17843       if (!SV0.getNode() || SV0 == CurrentVec) {
17844         // Ok. CurrentVec is the left hand side.
17845         // Update the mask accordingly.
17846         SV0 = CurrentVec;
17847         Mask.push_back(Idx);
17848         continue;
17849       }
17850
17851       // Bail out if we cannot convert the shuffle pair into a single shuffle.
17852       if (SV1.getNode() && SV1 != CurrentVec)
17853         return SDValue();
17854
17855       // Ok. CurrentVec is the right hand side.
17856       // Update the mask accordingly.
17857       SV1 = CurrentVec;
17858       Mask.push_back(Idx + NumElts);
17859     }
17860
17861     // Check if all indices in Mask are Undef. In case, propagate Undef.
17862     bool isUndefMask = true;
17863     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17864       isUndefMask &= Mask[i] < 0;
17865
17866     if (isUndefMask)
17867       return DAG.getUNDEF(VT);
17868
17869     if (!SV0.getNode())
17870       SV0 = DAG.getUNDEF(VT);
17871     if (!SV1.getNode())
17872       SV1 = DAG.getUNDEF(VT);
17873
17874     // Avoid introducing shuffles with illegal mask.
17875     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17876       ShuffleVectorSDNode::commuteMask(Mask);
17877
17878       if (!TLI.isShuffleMaskLegal(Mask, VT))
17879         return SDValue();
17880
17881       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17882       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17883       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17884       std::swap(SV0, SV1);
17885     }
17886
17887     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17888     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17889     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17890     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17891   }
17892
17893   return SDValue();
17894 }
17895
17896 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17897   SDValue InVal = N->getOperand(0);
17898   EVT VT = N->getValueType(0);
17899
17900   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17901   // with a VECTOR_SHUFFLE and possible truncate.
17902   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17903     SDValue InVec = InVal->getOperand(0);
17904     SDValue EltNo = InVal->getOperand(1);
17905     auto InVecT = InVec.getValueType();
17906     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17907       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17908       int Elt = C0->getZExtValue();
17909       NewMask[0] = Elt;
17910       SDValue Val;
17911       // If we have an implict truncate do truncate here as long as it's legal.
17912       // if it's not legal, this should
17913       if (VT.getScalarType() != InVal.getValueType() &&
17914           InVal.getValueType().isScalarInteger() &&
17915           isTypeLegal(VT.getScalarType())) {
17916         Val =
17917             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17918         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17919       }
17920       if (VT.getScalarType() == InVecT.getScalarType() &&
17921           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17922           TLI.isShuffleMaskLegal(NewMask, VT)) {
17923         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17924                                    DAG.getUNDEF(InVecT), NewMask);
17925         // If the initial vector is the correct size this shuffle is a
17926         // valid result.
17927         if (VT == InVecT)
17928           return Val;
17929         // If not we must truncate the vector.
17930         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17931           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17932           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17933           EVT SubVT =
17934               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17935                                VT.getVectorNumElements());
17936           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17937                             ZeroIdx);
17938           return Val;
17939         }
17940       }
17941     }
17942   }
17943
17944   return SDValue();
17945 }
17946
17947 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17948   EVT VT = N->getValueType(0);
17949   SDValue N0 = N->getOperand(0);
17950   SDValue N1 = N->getOperand(1);
17951   SDValue N2 = N->getOperand(2);
17952
17953   // If inserting an UNDEF, just return the original vector.
17954   if (N1.isUndef())
17955     return N0;
17956
17957   // If this is an insert of an extracted vector into an undef vector, we can
17958   // just use the input to the extract.
17959   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17960       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17961     return N1.getOperand(0);
17962
17963   // If we are inserting a bitcast value into an undef, with the same
17964   // number of elements, just use the bitcast input of the extract.
17965   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17966   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17967   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17968       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17969       N1.getOperand(0).getOperand(1) == N2 &&
17970       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
17971           VT.getVectorNumElements() &&
17972       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
17973           VT.getSizeInBits()) {
17974     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17975   }
17976
17977   // If both N1 and N2 are bitcast values on which insert_subvector
17978   // would makes sense, pull the bitcast through.
17979   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17980   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17981   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17982     SDValue CN0 = N0.getOperand(0);
17983     SDValue CN1 = N1.getOperand(0);
17984     EVT CN0VT = CN0.getValueType();
17985     EVT CN1VT = CN1.getValueType();
17986     if (CN0VT.isVector() && CN1VT.isVector() &&
17987         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17988         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17989       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17990                                       CN0.getValueType(), CN0, CN1, N2);
17991       return DAG.getBitcast(VT, NewINSERT);
17992     }
17993   }
17994
17995   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17996   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17997   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17998   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17999       N0.getOperand(1).getValueType() == N1.getValueType() &&
18000       N0.getOperand(2) == N2)
18001     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18002                        N1, N2);
18003
18004   // Eliminate an intermediate insert into an undef vector:
18005   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18006   // insert_subvector undef, X, N2
18007   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18008       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18009     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18010                        N1.getOperand(1), N2);
18011
18012   if (!isa<ConstantSDNode>(N2))
18013     return SDValue();
18014
18015   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18016
18017   // Canonicalize insert_subvector dag nodes.
18018   // Example:
18019   // (insert_subvector (insert_subvector A, Idx0), Idx1)
18020   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18021   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18022       N1.getValueType() == N0.getOperand(1).getValueType() &&
18023       isa<ConstantSDNode>(N0.getOperand(2))) {
18024     unsigned OtherIdx = N0.getConstantOperandVal(2);
18025     if (InsIdx < OtherIdx) {
18026       // Swap nodes.
18027       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18028                                   N0.getOperand(0), N1, N2);
18029       AddToWorklist(NewOp.getNode());
18030       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18031                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18032     }
18033   }
18034
18035   // If the input vector is a concatenation, and the insert replaces
18036   // one of the pieces, we can optimize into a single concat_vectors.
18037   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18038       N0.getOperand(0).getValueType() == N1.getValueType()) {
18039     unsigned Factor = N1.getValueType().getVectorNumElements();
18040
18041     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18042     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18043
18044     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18045   }
18046
18047   // Simplify source operands based on insertion.
18048   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18049     return SDValue(N, 0);
18050
18051   return SDValue();
18052 }
18053
18054 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18055   SDValue N0 = N->getOperand(0);
18056
18057   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18058   if (N0->getOpcode() == ISD::FP16_TO_FP)
18059     return N0->getOperand(0);
18060
18061   return SDValue();
18062 }
18063
18064 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18065   SDValue N0 = N->getOperand(0);
18066
18067   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18068   if (N0->getOpcode() == ISD::AND) {
18069     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
18070     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18071       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18072                          N0.getOperand(0));
18073     }
18074   }
18075
18076   return SDValue();
18077 }
18078
18079 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18080 /// with the destination vector and a zero vector.
18081 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18082 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
18083 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18084   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18085
18086   EVT VT = N->getValueType(0);
18087   SDValue LHS = N->getOperand(0);
18088   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18089   SDLoc DL(N);
18090
18091   // Make sure we're not running after operation legalization where it
18092   // may have custom lowered the vector shuffles.
18093   if (LegalOperations)
18094     return SDValue();
18095
18096   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18097     return SDValue();
18098
18099   EVT RVT = RHS.getValueType();
18100   unsigned NumElts = RHS.getNumOperands();
18101
18102   // Attempt to create a valid clear mask, splitting the mask into
18103   // sub elements and checking to see if each is
18104   // all zeros or all ones - suitable for shuffle masking.
18105   auto BuildClearMask = [&](int Split) {
18106     int NumSubElts = NumElts * Split;
18107     int NumSubBits = RVT.getScalarSizeInBits() / Split;
18108
18109     SmallVector<int, 8> Indices;
18110     for (int i = 0; i != NumSubElts; ++i) {
18111       int EltIdx = i / Split;
18112       int SubIdx = i % Split;
18113       SDValue Elt = RHS.getOperand(EltIdx);
18114       if (Elt.isUndef()) {
18115         Indices.push_back(-1);
18116         continue;
18117       }
18118
18119       APInt Bits;
18120       if (isa<ConstantSDNode>(Elt))
18121         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18122       else if (isa<ConstantFPSDNode>(Elt))
18123         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18124       else
18125         return SDValue();
18126
18127       // Extract the sub element from the constant bit mask.
18128       if (DAG.getDataLayout().isBigEndian()) {
18129         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18130       } else {
18131         Bits.lshrInPlace(SubIdx * NumSubBits);
18132       }
18133
18134       if (Split > 1)
18135         Bits = Bits.trunc(NumSubBits);
18136
18137       if (Bits.isAllOnesValue())
18138         Indices.push_back(i);
18139       else if (Bits == 0)
18140         Indices.push_back(i + NumSubElts);
18141       else
18142         return SDValue();
18143     }
18144
18145     // Let's see if the target supports this vector_shuffle.
18146     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18147     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18148     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18149       return SDValue();
18150
18151     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18152     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18153                                                    DAG.getBitcast(ClearVT, LHS),
18154                                                    Zero, Indices));
18155   };
18156
18157   // Determine maximum split level (byte level masking).
18158   int MaxSplit = 1;
18159   if (RVT.getScalarSizeInBits() % 8 == 0)
18160     MaxSplit = RVT.getScalarSizeInBits() / 8;
18161
18162   for (int Split = 1; Split <= MaxSplit; ++Split)
18163     if (RVT.getScalarSizeInBits() % Split == 0)
18164       if (SDValue S = BuildClearMask(Split))
18165         return S;
18166
18167   return SDValue();
18168 }
18169
18170 /// Visit a binary vector operation, like ADD.
18171 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
18172   assert(N->getValueType(0).isVector() &&
18173          "SimplifyVBinOp only works on vectors!");
18174
18175   SDValue LHS = N->getOperand(0);
18176   SDValue RHS = N->getOperand(1);
18177   SDValue Ops[] = {LHS, RHS};
18178
18179   // See if we can constant fold the vector operation.
18180   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
18181           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
18182     return Fold;
18183
18184   // Type legalization might introduce new shuffles in the DAG.
18185   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
18186   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
18187   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
18188       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
18189       LHS.getOperand(1).isUndef() &&
18190       RHS.getOperand(1).isUndef()) {
18191     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
18192     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
18193
18194     if (SVN0->getMask().equals(SVN1->getMask())) {
18195       EVT VT = N->getValueType(0);
18196       SDValue UndefVector = LHS.getOperand(1);
18197       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
18198                                      LHS.getOperand(0), RHS.getOperand(0),
18199                                      N->getFlags());
18200       AddUsersToWorklist(N);
18201       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
18202                                   SVN0->getMask());
18203     }
18204   }
18205
18206   return SDValue();
18207 }
18208
18209 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
18210                                     SDValue N2) {
18211   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
18212
18213   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
18214                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
18215
18216   // If we got a simplified select_cc node back from SimplifySelectCC, then
18217   // break it down into a new SETCC node, and a new SELECT node, and then return
18218   // the SELECT node, since we were called with a SELECT node.
18219   if (SCC.getNode()) {
18220     // Check to see if we got a select_cc back (to turn into setcc/select).
18221     // Otherwise, just return whatever node we got back, like fabs.
18222     if (SCC.getOpcode() == ISD::SELECT_CC) {
18223       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
18224                                   N0.getValueType(),
18225                                   SCC.getOperand(0), SCC.getOperand(1),
18226                                   SCC.getOperand(4));
18227       AddToWorklist(SETCC.getNode());
18228       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
18229                            SCC.getOperand(2), SCC.getOperand(3));
18230     }
18231
18232     return SCC;
18233   }
18234   return SDValue();
18235 }
18236
18237 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18238 /// being selected between, see if we can simplify the select.  Callers of this
18239 /// should assume that TheSelect is deleted if this returns true.  As such, they
18240 /// should return the appropriate thing (e.g. the node) back to the top-level of
18241 /// the DAG combiner loop to avoid it being looked at.
18242 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
18243                                     SDValue RHS) {
18244   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18245   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18246   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
18247     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
18248       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18249       SDValue Sqrt = RHS;
18250       ISD::CondCode CC;
18251       SDValue CmpLHS;
18252       const ConstantFPSDNode *Zero = nullptr;
18253
18254       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
18255         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
18256         CmpLHS = TheSelect->getOperand(0);
18257         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
18258       } else {
18259         // SELECT or VSELECT
18260         SDValue Cmp = TheSelect->getOperand(0);
18261         if (Cmp.getOpcode() == ISD::SETCC) {
18262           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
18263           CmpLHS = Cmp.getOperand(0);
18264           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
18265         }
18266       }
18267       if (Zero && Zero->isZero() &&
18268           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
18269           CC == ISD::SETULT || CC == ISD::SETLT)) {
18270         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18271         CombineTo(TheSelect, Sqrt);
18272         return true;
18273       }
18274     }
18275   }
18276   // Cannot simplify select with vector condition
18277   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
18278
18279   // If this is a select from two identical things, try to pull the operation
18280   // through the select.
18281   if (LHS.getOpcode() != RHS.getOpcode() ||
18282       !LHS.hasOneUse() || !RHS.hasOneUse())
18283     return false;
18284
18285   // If this is a load and the token chain is identical, replace the select
18286   // of two loads with a load through a select of the address to load from.
18287   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
18288   // constants have been dropped into the constant pool.
18289   if (LHS.getOpcode() == ISD::LOAD) {
18290     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
18291     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
18292
18293     // Token chains must be identical.
18294     if (LHS.getOperand(0) != RHS.getOperand(0) ||
18295         // Do not let this transformation reduce the number of volatile loads.
18296         LLD->isVolatile() || RLD->isVolatile() ||
18297         // FIXME: If either is a pre/post inc/dec load,
18298         // we'd need to split out the address adjustment.
18299         LLD->isIndexed() || RLD->isIndexed() ||
18300         // If this is an EXTLOAD, the VT's must match.
18301         LLD->getMemoryVT() != RLD->getMemoryVT() ||
18302         // If this is an EXTLOAD, the kind of extension must match.
18303         (LLD->getExtensionType() != RLD->getExtensionType() &&
18304          // The only exception is if one of the extensions is anyext.
18305          LLD->getExtensionType() != ISD::EXTLOAD &&
18306          RLD->getExtensionType() != ISD::EXTLOAD) ||
18307         // FIXME: this discards src value information.  This is
18308         // over-conservative. It would be beneficial to be able to remember
18309         // both potential memory locations.  Since we are discarding
18310         // src value info, don't do the transformation if the memory
18311         // locations are not in the default address space.
18312         LLD->getPointerInfo().getAddrSpace() != 0 ||
18313         RLD->getPointerInfo().getAddrSpace() != 0 ||
18314         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
18315                                       LLD->getBasePtr().getValueType()))
18316       return false;
18317
18318     // The loads must not depend on one another.
18319     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
18320       return false;
18321
18322     // Check that the select condition doesn't reach either load.  If so,
18323     // folding this will induce a cycle into the DAG.  If not, this is safe to
18324     // xform, so create a select of the addresses.
18325
18326     SmallPtrSet<const SDNode *, 32> Visited;
18327     SmallVector<const SDNode *, 16> Worklist;
18328
18329     // Always fail if LLD and RLD are not independent. TheSelect is a
18330     // predecessor to all Nodes in question so we need not search past it.
18331
18332     Visited.insert(TheSelect);
18333     Worklist.push_back(LLD);
18334     Worklist.push_back(RLD);
18335
18336     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
18337         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
18338       return false;
18339
18340     SDValue Addr;
18341     if (TheSelect->getOpcode() == ISD::SELECT) {
18342       // We cannot do this optimization if any pair of {RLD, LLD} is a
18343       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
18344       // Loads, we only need to check if CondNode is a successor to one of the
18345       // loads. We can further avoid this if there's no use of their chain
18346       // value.
18347       SDNode *CondNode = TheSelect->getOperand(0).getNode();
18348       Worklist.push_back(CondNode);
18349
18350       if ((LLD->hasAnyUseOfValue(1) &&
18351            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18352           (RLD->hasAnyUseOfValue(1) &&
18353            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18354         return false;
18355
18356       Addr = DAG.getSelect(SDLoc(TheSelect),
18357                            LLD->getBasePtr().getValueType(),
18358                            TheSelect->getOperand(0), LLD->getBasePtr(),
18359                            RLD->getBasePtr());
18360     } else {  // Otherwise SELECT_CC
18361       // We cannot do this optimization if any pair of {RLD, LLD} is a
18362       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
18363       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
18364       // one of the loads. We can further avoid this if there's no use of their
18365       // chain value.
18366
18367       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
18368       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
18369       Worklist.push_back(CondLHS);
18370       Worklist.push_back(CondRHS);
18371
18372       if ((LLD->hasAnyUseOfValue(1) &&
18373            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18374           (RLD->hasAnyUseOfValue(1) &&
18375            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18376         return false;
18377
18378       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
18379                          LLD->getBasePtr().getValueType(),
18380                          TheSelect->getOperand(0),
18381                          TheSelect->getOperand(1),
18382                          LLD->getBasePtr(), RLD->getBasePtr(),
18383                          TheSelect->getOperand(4));
18384     }
18385
18386     SDValue Load;
18387     // It is safe to replace the two loads if they have different alignments,
18388     // but the new load must be the minimum (most restrictive) alignment of the
18389     // inputs.
18390     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
18391     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
18392     if (!RLD->isInvariant())
18393       MMOFlags &= ~MachineMemOperand::MOInvariant;
18394     if (!RLD->isDereferenceable())
18395       MMOFlags &= ~MachineMemOperand::MODereferenceable;
18396     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
18397       // FIXME: Discards pointer and AA info.
18398       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
18399                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
18400                          MMOFlags);
18401     } else {
18402       // FIXME: Discards pointer and AA info.
18403       Load = DAG.getExtLoad(
18404           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
18405                                                   : LLD->getExtensionType(),
18406           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
18407           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
18408     }
18409
18410     // Users of the select now use the result of the load.
18411     CombineTo(TheSelect, Load);
18412
18413     // Users of the old loads now use the new load's chain.  We know the
18414     // old-load value is dead now.
18415     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
18416     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
18417     return true;
18418   }
18419
18420   return false;
18421 }
18422
18423 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
18424 /// bitwise 'and'.
18425 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
18426                                             SDValue N1, SDValue N2, SDValue N3,
18427                                             ISD::CondCode CC) {
18428   // If this is a select where the false operand is zero and the compare is a
18429   // check of the sign bit, see if we can perform the "gzip trick":
18430   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
18431   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
18432   EVT XType = N0.getValueType();
18433   EVT AType = N2.getValueType();
18434   if (!isNullConstant(N3) || !XType.bitsGE(AType))
18435     return SDValue();
18436
18437   // If the comparison is testing for a positive value, we have to invert
18438   // the sign bit mask, so only do that transform if the target has a bitwise
18439   // 'and not' instruction (the invert is free).
18440   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
18441     // (X > -1) ? A : 0
18442     // (X >  0) ? X : 0 <-- This is canonical signed max.
18443     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
18444       return SDValue();
18445   } else if (CC == ISD::SETLT) {
18446     // (X <  0) ? A : 0
18447     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
18448     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
18449       return SDValue();
18450   } else {
18451     return SDValue();
18452   }
18453
18454   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
18455   // constant.
18456   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
18457   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18458   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
18459     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
18460     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
18461     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
18462     AddToWorklist(Shift.getNode());
18463
18464     if (XType.bitsGT(AType)) {
18465       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18466       AddToWorklist(Shift.getNode());
18467     }
18468
18469     if (CC == ISD::SETGT)
18470       Shift = DAG.getNOT(DL, Shift, AType);
18471
18472     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18473   }
18474
18475   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
18476   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
18477   AddToWorklist(Shift.getNode());
18478
18479   if (XType.bitsGT(AType)) {
18480     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18481     AddToWorklist(Shift.getNode());
18482   }
18483
18484   if (CC == ISD::SETGT)
18485     Shift = DAG.getNOT(DL, Shift, AType);
18486
18487   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18488 }
18489
18490 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
18491 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
18492 /// in it. This may be a win when the constant is not otherwise available
18493 /// because it replaces two constant pool loads with one.
18494 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
18495     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
18496     ISD::CondCode CC) {
18497   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
18498     return SDValue();
18499
18500   // If we are before legalize types, we want the other legalization to happen
18501   // first (for example, to avoid messing with soft float).
18502   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
18503   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
18504   EVT VT = N2.getValueType();
18505   if (!TV || !FV || !TLI.isTypeLegal(VT))
18506     return SDValue();
18507
18508   // If a constant can be materialized without loads, this does not make sense.
18509   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
18510       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
18511       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
18512     return SDValue();
18513
18514   // If both constants have multiple uses, then we won't need to do an extra
18515   // load. The values are likely around in registers for other users.
18516   if (!TV->hasOneUse() && !FV->hasOneUse())
18517     return SDValue();
18518
18519   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
18520                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
18521   Type *FPTy = Elts[0]->getType();
18522   const DataLayout &TD = DAG.getDataLayout();
18523
18524   // Create a ConstantArray of the two constants.
18525   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
18526   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
18527                                       TD.getPrefTypeAlignment(FPTy));
18528   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
18529
18530   // Get offsets to the 0 and 1 elements of the array, so we can select between
18531   // them.
18532   SDValue Zero = DAG.getIntPtrConstant(0, DL);
18533   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
18534   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
18535   SDValue Cond =
18536       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
18537   AddToWorklist(Cond.getNode());
18538   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
18539   AddToWorklist(CstOffset.getNode());
18540   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
18541   AddToWorklist(CPIdx.getNode());
18542   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
18543                      MachinePointerInfo::getConstantPool(
18544                          DAG.getMachineFunction()), Alignment);
18545 }
18546
18547 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
18548 /// where 'cond' is the comparison specified by CC.
18549 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
18550                                       SDValue N2, SDValue N3, ISD::CondCode CC,
18551                                       bool NotExtCompare) {
18552   // (x ? y : y) -> y.
18553   if (N2 == N3) return N2;
18554
18555   EVT CmpOpVT = N0.getValueType();
18556   EVT VT = N2.getValueType();
18557   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
18558   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18559   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
18560
18561   // Determine if the condition we're dealing with is constant.
18562   SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
18563                               false);
18564   if (SCC.getNode()) AddToWorklist(SCC.getNode());
18565
18566   if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
18567     // fold select_cc true, x, y -> x
18568     // fold select_cc false, x, y -> y
18569     return !SCCC->isNullValue() ? N2 : N3;
18570   }
18571
18572   if (SDValue V =
18573           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
18574     return V;
18575
18576   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
18577     return V;
18578
18579   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
18580   // where y is has a single bit set.
18581   // A plaintext description would be, we can turn the SELECT_CC into an AND
18582   // when the condition can be materialized as an all-ones register.  Any
18583   // single bit-test can be materialized as an all-ones register with
18584   // shift-left and shift-right-arith.
18585   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
18586       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
18587     SDValue AndLHS = N0->getOperand(0);
18588     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18589     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
18590       // Shift the tested bit over the sign bit.
18591       const APInt &AndMask = ConstAndRHS->getAPIntValue();
18592       SDValue ShlAmt =
18593         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
18594                         getShiftAmountTy(AndLHS.getValueType()));
18595       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
18596
18597       // Now arithmetic right shift it all the way over, so the result is either
18598       // all-ones, or zero.
18599       SDValue ShrAmt =
18600         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
18601                         getShiftAmountTy(Shl.getValueType()));
18602       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
18603
18604       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
18605     }
18606   }
18607
18608   // fold select C, 16, 0 -> shl C, 4
18609   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
18610   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
18611
18612   if ((Fold || Swap) &&
18613       TLI.getBooleanContents(CmpOpVT) ==
18614           TargetLowering::ZeroOrOneBooleanContent &&
18615       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
18616
18617     if (Swap) {
18618       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
18619       std::swap(N2C, N3C);
18620     }
18621
18622     // If the caller doesn't want us to simplify this into a zext of a compare,
18623     // don't do it.
18624     if (NotExtCompare && N2C->isOne())
18625       return SDValue();
18626
18627     SDValue Temp, SCC;
18628     // zext (setcc n0, n1)
18629     if (LegalTypes) {
18630       SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
18631       if (VT.bitsLT(SCC.getValueType()))
18632         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
18633       else
18634         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18635     } else {
18636       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
18637       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18638     }
18639
18640     AddToWorklist(SCC.getNode());
18641     AddToWorklist(Temp.getNode());
18642
18643     if (N2C->isOne())
18644       return Temp;
18645
18646     // shl setcc result by log2 n2c
18647     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
18648                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
18649                                        SDLoc(Temp),
18650                                        getShiftAmountTy(Temp.getValueType())));
18651   }
18652
18653   // Check to see if this is an integer abs.
18654   // select_cc setg[te] X,  0,  X, -X ->
18655   // select_cc setgt    X, -1,  X, -X ->
18656   // select_cc setl[te] X,  0, -X,  X ->
18657   // select_cc setlt    X,  1, -X,  X ->
18658   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18659   if (N1C) {
18660     ConstantSDNode *SubC = nullptr;
18661     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18662          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18663         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18664       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18665     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18666               (N1C->isOne() && CC == ISD::SETLT)) &&
18667              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18668       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18669
18670     if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
18671       SDLoc DL(N0);
18672       SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
18673                                   DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
18674                                                   DL,
18675                                                   getShiftAmountTy(CmpOpVT)));
18676       SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
18677       AddToWorklist(Shift.getNode());
18678       AddToWorklist(Add.getNode());
18679       return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
18680     }
18681   }
18682
18683   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18684   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18685   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18686   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18687   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18688   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18689   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18690   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18691   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18692     SDValue ValueOnZero = N2;
18693     SDValue Count = N3;
18694     // If the condition is NE instead of E, swap the operands.
18695     if (CC == ISD::SETNE)
18696       std::swap(ValueOnZero, Count);
18697     // Check if the value on zero is a constant equal to the bits in the type.
18698     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18699       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18700         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18701         // legal, combine to just cttz.
18702         if ((Count.getOpcode() == ISD::CTTZ ||
18703              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18704             N0 == Count.getOperand(0) &&
18705             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18706           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18707         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18708         // legal, combine to just ctlz.
18709         if ((Count.getOpcode() == ISD::CTLZ ||
18710              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18711             N0 == Count.getOperand(0) &&
18712             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18713           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18714       }
18715     }
18716   }
18717
18718   return SDValue();
18719 }
18720
18721 /// This is a stub for TargetLowering::SimplifySetCC.
18722 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18723                                    ISD::CondCode Cond, const SDLoc &DL,
18724                                    bool foldBooleans) {
18725   TargetLowering::DAGCombinerInfo
18726     DagCombineInfo(DAG, Level, false, this);
18727   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18728 }
18729
18730 /// Given an ISD::SDIV node expressing a divide by constant, return
18731 /// a DAG expression to select that will generate the same value by multiplying
18732 /// by a magic number.
18733 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18734 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18735   // when optimising for minimum size, we don't want to expand a div to a mul
18736   // and a shift.
18737   if (DAG.getMachineFunction().getFunction().optForMinSize())
18738     return SDValue();
18739
18740   SmallVector<SDNode *, 8> Built;
18741   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18742     for (SDNode *N : Built)
18743       AddToWorklist(N);
18744     return S;
18745   }
18746
18747   return SDValue();
18748 }
18749
18750 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18751 /// DAG expression that will generate the same value by right shifting.
18752 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18753   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18754   if (!C)
18755     return SDValue();
18756
18757   // Avoid division by zero.
18758   if (C->isNullValue())
18759     return SDValue();
18760
18761   SmallVector<SDNode *, 8> Built;
18762   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
18763     for (SDNode *N : Built)
18764       AddToWorklist(N);
18765     return S;
18766   }
18767
18768   return SDValue();
18769 }
18770
18771 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18772 /// expression that will generate the same value by multiplying by a magic
18773 /// number.
18774 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18775 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18776   // when optimising for minimum size, we don't want to expand a div to a mul
18777   // and a shift.
18778   if (DAG.getMachineFunction().getFunction().optForMinSize())
18779     return SDValue();
18780
18781   SmallVector<SDNode *, 8> Built;
18782   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
18783     for (SDNode *N : Built)
18784       AddToWorklist(N);
18785     return S;
18786   }
18787
18788   return SDValue();
18789 }
18790
18791 /// Determines the LogBase2 value for a non-null input value using the
18792 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18793 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18794   EVT VT = V.getValueType();
18795   unsigned EltBits = VT.getScalarSizeInBits();
18796   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18797   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18798   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18799   return LogBase2;
18800 }
18801
18802 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18803 /// For the reciprocal, we need to find the zero of the function:
18804 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
18805 ///     =>
18806 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18807 ///     does not require additional intermediate precision]
18808 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18809   if (Level >= AfterLegalizeDAG)
18810     return SDValue();
18811
18812   // TODO: Handle half and/or extended types?
18813   EVT VT = Op.getValueType();
18814   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18815     return SDValue();
18816
18817   // If estimates are explicitly disabled for this function, we're done.
18818   MachineFunction &MF = DAG.getMachineFunction();
18819   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18820   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18821     return SDValue();
18822
18823   // Estimates may be explicitly enabled for this type with a custom number of
18824   // refinement steps.
18825   int Iterations = TLI.getDivRefinementSteps(VT, MF);
18826   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18827     AddToWorklist(Est.getNode());
18828
18829     if (Iterations) {
18830       EVT VT = Op.getValueType();
18831       SDLoc DL(Op);
18832       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18833
18834       // Newton iterations: Est = Est + Est (1 - Arg * Est)
18835       for (int i = 0; i < Iterations; ++i) {
18836         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18837         AddToWorklist(NewEst.getNode());
18838
18839         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18840         AddToWorklist(NewEst.getNode());
18841
18842         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18843         AddToWorklist(NewEst.getNode());
18844
18845         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18846         AddToWorklist(Est.getNode());
18847       }
18848     }
18849     return Est;
18850   }
18851
18852   return SDValue();
18853 }
18854
18855 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18856 /// For the reciprocal sqrt, we need to find the zero of the function:
18857 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18858 ///     =>
18859 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18860 /// As a result, we precompute A/2 prior to the iteration loop.
18861 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18862                                          unsigned Iterations,
18863                                          SDNodeFlags Flags, bool Reciprocal) {
18864   EVT VT = Arg.getValueType();
18865   SDLoc DL(Arg);
18866   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18867
18868   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18869   // this entire sequence requires only one FP constant.
18870   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18871   AddToWorklist(HalfArg.getNode());
18872
18873   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18874   AddToWorklist(HalfArg.getNode());
18875
18876   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18877   for (unsigned i = 0; i < Iterations; ++i) {
18878     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18879     AddToWorklist(NewEst.getNode());
18880
18881     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18882     AddToWorklist(NewEst.getNode());
18883
18884     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18885     AddToWorklist(NewEst.getNode());
18886
18887     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18888     AddToWorklist(Est.getNode());
18889   }
18890
18891   // If non-reciprocal square root is requested, multiply the result by Arg.
18892   if (!Reciprocal) {
18893     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18894     AddToWorklist(Est.getNode());
18895   }
18896
18897   return Est;
18898 }
18899
18900 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18901 /// For the reciprocal sqrt, we need to find the zero of the function:
18902 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18903 ///     =>
18904 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18905 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18906                                          unsigned Iterations,
18907                                          SDNodeFlags Flags, bool Reciprocal) {
18908   EVT VT = Arg.getValueType();
18909   SDLoc DL(Arg);
18910   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18911   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18912
18913   // This routine must enter the loop below to work correctly
18914   // when (Reciprocal == false).
18915   assert(Iterations > 0);
18916
18917   // Newton iterations for reciprocal square root:
18918   // E = (E * -0.5) * ((A * E) * E + -3.0)
18919   for (unsigned i = 0; i < Iterations; ++i) {
18920     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18921     AddToWorklist(AE.getNode());
18922
18923     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18924     AddToWorklist(AEE.getNode());
18925
18926     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18927     AddToWorklist(RHS.getNode());
18928
18929     // When calculating a square root at the last iteration build:
18930     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18931     // (notice a common subexpression)
18932     SDValue LHS;
18933     if (Reciprocal || (i + 1) < Iterations) {
18934       // RSQRT: LHS = (E * -0.5)
18935       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18936     } else {
18937       // SQRT: LHS = (A * E) * -0.5
18938       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18939     }
18940     AddToWorklist(LHS.getNode());
18941
18942     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18943     AddToWorklist(Est.getNode());
18944   }
18945
18946   return Est;
18947 }
18948
18949 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18950 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18951 /// Op can be zero.
18952 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18953                                            bool Reciprocal) {
18954   if (Level >= AfterLegalizeDAG)
18955     return SDValue();
18956
18957   // TODO: Handle half and/or extended types?
18958   EVT VT = Op.getValueType();
18959   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18960     return SDValue();
18961
18962   // If estimates are explicitly disabled for this function, we're done.
18963   MachineFunction &MF = DAG.getMachineFunction();
18964   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18965   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18966     return SDValue();
18967
18968   // Estimates may be explicitly enabled for this type with a custom number of
18969   // refinement steps.
18970   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18971
18972   bool UseOneConstNR = false;
18973   if (SDValue Est =
18974       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18975                           Reciprocal)) {
18976     AddToWorklist(Est.getNode());
18977
18978     if (Iterations) {
18979       Est = UseOneConstNR
18980             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18981             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18982
18983       if (!Reciprocal) {
18984         // The estimate is now completely wrong if the input was exactly 0.0 or
18985         // possibly a denormal. Force the answer to 0.0 for those cases.
18986         EVT VT = Op.getValueType();
18987         SDLoc DL(Op);
18988         EVT CCVT = getSetCCResultType(VT);
18989         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18990         const Function &F = DAG.getMachineFunction().getFunction();
18991         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18992         if (Denorms.getValueAsString().equals("ieee")) {
18993           // fabs(X) < SmallestNormal ? 0.0 : Est
18994           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18995           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18996           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18997           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18998           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18999           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19000           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19001           AddToWorklist(Fabs.getNode());
19002           AddToWorklist(IsDenorm.getNode());
19003           AddToWorklist(Est.getNode());
19004         } else {
19005           // X == 0.0 ? 0.0 : Est
19006           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19007           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19008           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19009           AddToWorklist(IsZero.getNode());
19010           AddToWorklist(Est.getNode());
19011         }
19012       }
19013     }
19014     return Est;
19015   }
19016
19017   return SDValue();
19018 }
19019
19020 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19021   return buildSqrtEstimateImpl(Op, Flags, true);
19022 }
19023
19024 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19025   return buildSqrtEstimateImpl(Op, Flags, false);
19026 }
19027
19028 /// Return true if there is any possibility that the two addresses overlap.
19029 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
19030   // If they are the same then they must be aliases.
19031   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
19032
19033   // If they are both volatile then they cannot be reordered.
19034   if (Op0->isVolatile() && Op1->isVolatile()) return true;
19035
19036   // If one operation reads from invariant memory, and the other may store, they
19037   // cannot alias. These should really be checking the equivalent of mayWrite,
19038   // but it only matters for memory nodes other than load /store.
19039   if (Op0->isInvariant() && Op1->writeMem())
19040     return false;
19041
19042   if (Op1->isInvariant() && Op0->writeMem())
19043     return false;
19044
19045   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
19046   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
19047
19048   // Check for BaseIndexOffset matching.
19049   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
19050   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
19051   int64_t PtrDiff;
19052   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
19053     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
19054       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
19055
19056     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
19057     // able to calculate their relative offset if at least one arises
19058     // from an alloca. However, these allocas cannot overlap and we
19059     // can infer there is no alias.
19060     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
19061       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
19062         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
19063         // If the base are the same frame index but the we couldn't find a
19064         // constant offset, (indices are different) be conservative.
19065         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
19066                        !MFI.isFixedObjectIndex(B->getIndex())))
19067           return false;
19068       }
19069
19070     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
19071     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
19072     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
19073     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
19074     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
19075     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
19076
19077     // If of mismatched base types or checkable indices we can check
19078     // they do not alias.
19079     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
19080          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
19081         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
19082       return false;
19083   }
19084
19085   // If we know required SrcValue1 and SrcValue2 have relatively large
19086   // alignment compared to the size and offset of the access, we may be able
19087   // to prove they do not alias. This check is conservative for now to catch
19088   // cases created by splitting vector types.
19089   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
19090   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
19091   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
19092   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
19093   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19094       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
19095     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19096     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19097
19098     // There is no overlap between these relatively aligned accesses of
19099     // similar size. Return no alias.
19100     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
19101         (OffAlign1 + NumBytes1) <= OffAlign0)
19102       return false;
19103   }
19104
19105   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
19106                    ? CombinerGlobalAA
19107                    : DAG.getSubtarget().useAA();
19108 #ifndef NDEBUG
19109   if (CombinerAAOnlyFunc.getNumOccurrences() &&
19110       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
19111     UseAA = false;
19112 #endif
19113
19114   if (UseAA && AA &&
19115       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
19116     // Use alias analysis information.
19117     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
19118     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
19119     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
19120     AliasResult AAResult =
19121         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
19122                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
19123                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
19124                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
19125     if (AAResult == NoAlias)
19126       return false;
19127   }
19128
19129   // Otherwise we have to assume they alias.
19130   return true;
19131 }
19132
19133 /// Walk up chain skipping non-aliasing memory nodes,
19134 /// looking for aliasing nodes and adding them to the Aliases vector.
19135 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
19136                                    SmallVectorImpl<SDValue> &Aliases) {
19137   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
19138   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
19139
19140   // Get alias information for node.
19141   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
19142
19143   // Starting off.
19144   Chains.push_back(OriginalChain);
19145   unsigned Depth = 0;
19146
19147   // Look at each chain and determine if it is an alias.  If so, add it to the
19148   // aliases list.  If not, then continue up the chain looking for the next
19149   // candidate.
19150   while (!Chains.empty()) {
19151     SDValue Chain = Chains.pop_back_val();
19152
19153     // For TokenFactor nodes, look at each operand and only continue up the
19154     // chain until we reach the depth limit.
19155     //
19156     // FIXME: The depth check could be made to return the last non-aliasing
19157     // chain we found before we hit a tokenfactor rather than the original
19158     // chain.
19159     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
19160       Aliases.clear();
19161       Aliases.push_back(OriginalChain);
19162       return;
19163     }
19164
19165     // Don't bother if we've been before.
19166     if (!Visited.insert(Chain.getNode()).second)
19167       continue;
19168
19169     switch (Chain.getOpcode()) {
19170     case ISD::EntryToken:
19171       // Entry token is ideal chain operand, but handled in FindBetterChain.
19172       break;
19173
19174     case ISD::LOAD:
19175     case ISD::STORE: {
19176       // Get alias information for Chain.
19177       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
19178           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
19179
19180       // If chain is alias then stop here.
19181       if (!(IsLoad && IsOpLoad) &&
19182           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
19183         Aliases.push_back(Chain);
19184       } else {
19185         // Look further up the chain.
19186         Chains.push_back(Chain.getOperand(0));
19187         ++Depth;
19188       }
19189       break;
19190     }
19191
19192     case ISD::TokenFactor:
19193       // We have to check each of the operands of the token factor for "small"
19194       // token factors, so we queue them up.  Adding the operands to the queue
19195       // (stack) in reverse order maintains the original order and increases the
19196       // likelihood that getNode will find a matching token factor (CSE.)
19197       if (Chain.getNumOperands() > 16) {
19198         Aliases.push_back(Chain);
19199         break;
19200       }
19201       for (unsigned n = Chain.getNumOperands(); n;)
19202         Chains.push_back(Chain.getOperand(--n));
19203       ++Depth;
19204       break;
19205
19206     case ISD::CopyFromReg:
19207       // Forward past CopyFromReg.
19208       Chains.push_back(Chain.getOperand(0));
19209       ++Depth;
19210       break;
19211
19212     default:
19213       // For all other instructions we will just have to take what we can get.
19214       Aliases.push_back(Chain);
19215       break;
19216     }
19217   }
19218 }
19219
19220 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19221 /// (aliasing node.)
19222 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
19223   if (OptLevel == CodeGenOpt::None)
19224     return OldChain;
19225
19226   // Ops for replacing token factor.
19227   SmallVector<SDValue, 8> Aliases;
19228
19229   // Accumulate all the aliases to this node.
19230   GatherAllAliases(N, OldChain, Aliases);
19231
19232   // If no operands then chain to entry token.
19233   if (Aliases.size() == 0)
19234     return DAG.getEntryNode();
19235
19236   // If a single operand then chain to it.  We don't need to revisit it.
19237   if (Aliases.size() == 1)
19238     return Aliases[0];
19239
19240   // Construct a custom tailored token factor.
19241   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
19242 }
19243
19244 // TODO: Replace with with std::monostate when we move to C++17.
19245 struct UnitT { } Unit;
19246 bool operator==(const UnitT &, const UnitT &) { return true; }
19247 bool operator!=(const UnitT &, const UnitT &) { return false; }
19248
19249 // This function tries to collect a bunch of potentially interesting
19250 // nodes to improve the chains of, all at once. This might seem
19251 // redundant, as this function gets called when visiting every store
19252 // node, so why not let the work be done on each store as it's visited?
19253 //
19254 // I believe this is mainly important because MergeConsecutiveStores
19255 // is unable to deal with merging stores of different sizes, so unless
19256 // we improve the chains of all the potential candidates up-front
19257 // before running MergeConsecutiveStores, it might only see some of
19258 // the nodes that will eventually be candidates, and then not be able
19259 // to go from a partially-merged state to the desired final
19260 // fully-merged state.
19261
19262 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
19263   SmallVector<StoreSDNode *, 8> ChainedStores;
19264   StoreSDNode *STChain = St;
19265   // Intervals records which offsets from BaseIndex have been covered. In
19266   // the common case, every store writes to the immediately previous address
19267   // space and thus merged with the previous interval at insertion time.
19268
19269   using IMap =
19270       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
19271   IMap::Allocator A;
19272   IMap Intervals(A);
19273
19274   // This holds the base pointer, index, and the offset in bytes from the base
19275   // pointer.
19276   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19277
19278   // We must have a base and an offset.
19279   if (!BasePtr.getBase().getNode())
19280     return false;
19281
19282   // Do not handle stores to undef base pointers.
19283   if (BasePtr.getBase().isUndef())
19284     return false;
19285
19286   // Add ST's interval.
19287   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
19288
19289   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
19290     // If the chain has more than one use, then we can't reorder the mem ops.
19291     if (!SDValue(Chain, 0)->hasOneUse())
19292       break;
19293     if (Chain->isVolatile() || Chain->isIndexed())
19294       break;
19295
19296     // Find the base pointer and offset for this memory node.
19297     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
19298     // Check that the base pointer is the same as the original one.
19299     int64_t Offset;
19300     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
19301       break;
19302     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
19303     // Make sure we don't overlap with other intervals by checking the ones to
19304     // the left or right before inserting.
19305     auto I = Intervals.find(Offset);
19306     // If there's a next interval, we should end before it.
19307     if (I != Intervals.end() && I.start() < (Offset + Length))
19308       break;
19309     // If there's a previous interval, we should start after it.
19310     if (I != Intervals.begin() && (--I).stop() <= Offset)
19311       break;
19312     Intervals.insert(Offset, Offset + Length, Unit);
19313
19314     ChainedStores.push_back(Chain);
19315     STChain = Chain;
19316   }
19317
19318   // If we didn't find a chained store, exit.
19319   if (ChainedStores.size() == 0)
19320     return false;
19321
19322   // Improve all chained stores (St and ChainedStores members) starting from
19323   // where the store chain ended and return single TokenFactor.
19324   SDValue NewChain = STChain->getChain();
19325   SmallVector<SDValue, 8> TFOps;
19326   for (unsigned I = ChainedStores.size(); I;) {
19327     StoreSDNode *S = ChainedStores[--I];
19328     SDValue BetterChain = FindBetterChain(S, NewChain);
19329     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
19330         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
19331     TFOps.push_back(SDValue(S, 0));
19332     ChainedStores[I] = S;
19333   }
19334
19335   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
19336   SDValue BetterChain = FindBetterChain(St, NewChain);
19337   SDValue NewST;
19338   if (St->isTruncatingStore())
19339     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
19340                               St->getBasePtr(), St->getMemoryVT(),
19341                               St->getMemOperand());
19342   else
19343     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
19344                          St->getBasePtr(), St->getMemOperand());
19345
19346   TFOps.push_back(NewST);
19347
19348   // If we improved every element of TFOps, then we've lost the dependence on
19349   // NewChain to successors of St and we need to add it back to TFOps. Do so at
19350   // the beginning to keep relative order consistent with FindBetterChains.
19351   auto hasImprovedChain = [&](SDValue ST) -> bool {
19352     return ST->getOperand(0) != NewChain;
19353   };
19354   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
19355   if (AddNewChain)
19356     TFOps.insert(TFOps.begin(), NewChain);
19357
19358   SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
19359   CombineTo(St, TF);
19360
19361   AddToWorklist(STChain);
19362   // Add TF operands worklist in reverse order.
19363   for (auto I = TF->getNumOperands(); I;)
19364     AddToWorklist(TF->getOperand(--I).getNode());
19365   AddToWorklist(TF.getNode());
19366   return true;
19367 }
19368
19369 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
19370   if (OptLevel == CodeGenOpt::None)
19371     return false;
19372
19373   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19374
19375   // We must have a base and an offset.
19376   if (!BasePtr.getBase().getNode())
19377     return false;
19378
19379   // Do not handle stores to undef base pointers.
19380   if (BasePtr.getBase().isUndef())
19381     return false;
19382
19383   // Directly improve a chain of disjoint stores starting at St.
19384   if (parallelizeChainedStores(St))
19385     return true;
19386
19387   // Improve St's Chain..
19388   SDValue BetterChain = FindBetterChain(St, St->getChain());
19389   if (St->getChain() != BetterChain) {
19390     replaceStoreChain(St, BetterChain);
19391     return true;
19392   }
19393   return false;
19394 }
19395
19396 /// This is the entry point for the file.
19397 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
19398                            CodeGenOpt::Level OptLevel) {
19399   /// This is the main entry point to this class.
19400   DAGCombiner(*this, AA, OptLevel).Run(Level);
19401 }