contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/CodeGen/DAGCombine.h"
  35 #include "llvm/CodeGen/ISDOpcodes.h"
  36 #include "llvm/CodeGen/MachineFrameInfo.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/SelectionDAG.h"
  41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  42 #include "llvm/CodeGen/SelectionDAGNodes.h"
  43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  44 #include "llvm/CodeGen/TargetLowering.h"
  45 #include "llvm/CodeGen/TargetRegisterInfo.h"
  46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  47 #include "llvm/CodeGen/ValueTypes.h"
  48 #include "llvm/IR/Attributes.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/DataLayout.h"
  51 #include "llvm/IR/DerivedTypes.h"
  52 #include "llvm/IR/Function.h"
  53 #include "llvm/IR/LLVMContext.h"
  54 #include "llvm/IR/Metadata.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CodeGen.h"
  57 #include "llvm/Support/CommandLine.h"
  58 #include "llvm/Support/Compiler.h"
  59 #include "llvm/Support/Debug.h"
  60 #include "llvm/Support/ErrorHandling.h"
  61 #include "llvm/Support/KnownBits.h"
  62 #include "llvm/Support/MachineValueType.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 #define DEBUG_TYPE "dagcombine"
  79
  80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  85 STATISTIC(SlicedLoads, "Number of load sliced");
  86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  87
  88 static cl::opt<bool>
  89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  91
  92 static cl::opt<bool>
  93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  94         cl::desc("Enable DAG combiner's use of TBAA"));
  95
  96 #ifndef NDEBUG
  97 static cl::opt<std::string>
  98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  99                    cl::desc("Only use DAG-combiner alias analysis in this"
 100                             " function"));
 101 #endif
 102
 103 /// Hidden option to stress test load slicing, i.e., when this option
 104 /// is enabled, load slicing bypasses most of its profitability guards.
 105 static cl::opt<bool>
 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 107                   cl::desc("Bypass the profitability model of load slicing"),
 108                   cl::init(false));
 109
 110 static cl::opt<bool>
 111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 112                     cl::desc("DAG combiner may split indexing from loads"));
 113
 114 static cl::opt<unsigned> TokenFactorInlineLimit(
 115     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 116     cl::desc("Limit the number of operands to inline for Token Factors"));
 117
 118 namespace {
 119
 120   class DAGCombiner {
 121     SelectionDAG &DAG;
 122     const TargetLowering &TLI;
 123     CombineLevel Level;
 124     CodeGenOpt::Level OptLevel;
 125     bool LegalOperations = false;
 126     bool LegalTypes = false;
 127     bool ForCodeSize;
 128
 129     /// Worklist of all of the nodes that need to be simplified.
 130     ///
 131     /// This must behave as a stack -- new nodes to process are pushed onto the
 132     /// back and when processing we pop off of the back.
 133     ///
 134     /// The worklist will not contain duplicates but may contain null entries
 135     /// due to nodes being deleted from the underlying DAG.
 136     SmallVector<SDNode *, 64> Worklist;
 137
 138     /// Mapping from an SDNode to its position on the worklist.
 139     ///
 140     /// This is used to find and remove nodes from the worklist (by nulling
 141     /// them) when they are deleted from the underlying DAG. It relies on
 142     /// stable indices of nodes within the worklist.
 143     DenseMap<SDNode *, unsigned> WorklistMap;
 144     /// This records all nodes attempted to add to the worklist since we
 145     /// considered a new worklist entry. As we keep do not add duplicate nodes
 146     /// in the worklist, this is different from the tail of the worklist.
 147     SmallSetVector<SDNode *, 32> PruningList;
 148
 149     /// Set of nodes which have been combined (at least once).
 150     ///
 151     /// This is used to allow us to reliably add any operands of a DAG node
 152     /// which have not yet been combined to the worklist.
 153     SmallPtrSet<SDNode *, 32> CombinedNodes;
 154
 155     // AA - Used for DAG load/store alias analysis.
 156     AliasAnalysis *AA;
 157
 158     /// When an instruction is simplified, add all users of the instruction to
 159     /// the work lists because they might get more simplified now.
 160     void AddUsersToWorklist(SDNode *N) {
 161       for (SDNode *Node : N->uses())
 162         AddToWorklist(Node);
 163     }
 164
 165     // Prune potentially dangling nodes. This is called after
 166     // any visit to a node, but should also be called during a visit after any
 167     // failed combine which may have created a DAG node.
 168     void clearAddedDanglingWorklistEntries() {
 169       // Check any nodes added to the worklist to see if they are prunable.
 170       while (!PruningList.empty()) {
 171         auto *N = PruningList.pop_back_val();
 172         if (N->use_empty())
 173           recursivelyDeleteUnusedNodes(N);
 174       }
 175     }
 176
 177     SDNode *getNextWorklistEntry() {
 178       // Before we do any work, remove nodes that are not in use.
 179       clearAddedDanglingWorklistEntries();
 180       SDNode *N = nullptr;
 181       // The Worklist holds the SDNodes in order, but it may contain null
 182       // entries.
 183       while (!N && !Worklist.empty()) {
 184         N = Worklist.pop_back_val();
 185       }
 186
 187       if (N) {
 188         bool GoodWorklistEntry = WorklistMap.erase(N);
 189         (void)GoodWorklistEntry;
 190         assert(GoodWorklistEntry &&
 191                "Found a worklist entry without a corresponding map entry!");
 192       }
 193       return N;
 194     }
 195
 196     /// Call the node-specific routine that folds each particular type of node.
 197     SDValue visit(SDNode *N);
 198
 199   public:
 200     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 201         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 202           OptLevel(OL), AA(AA) {
 203       ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
 204
 205       MaximumLegalStoreInBits = 0;
 206       for (MVT VT : MVT::all_valuetypes())
 207         if (EVT(VT).isSimple() && VT != MVT::Other &&
 208             TLI.isTypeLegal(EVT(VT)) &&
 209             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 210           MaximumLegalStoreInBits = VT.getSizeInBits();
 211     }
 212
 213     void ConsiderForPruning(SDNode *N) {
 214       // Mark this for potential pruning.
 215       PruningList.insert(N);
 216     }
 217
 218     /// Add to the worklist making sure its instance is at the back (next to be
 219     /// processed.)
 220     void AddToWorklist(SDNode *N) {
 221       assert(N->getOpcode() != ISD::DELETED_NODE &&
 222              "Deleted Node added to Worklist");
 223
 224       // Skip handle nodes as they can't usefully be combined and confuse the
 225       // zero-use deletion strategy.
 226       if (N->getOpcode() == ISD::HANDLENODE)
 227         return;
 228
 229       ConsiderForPruning(N);
 230
 231       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 232         Worklist.push_back(N);
 233     }
 234
 235     /// Remove all instances of N from the worklist.
 236     void removeFromWorklist(SDNode *N) {
 237       CombinedNodes.erase(N);
 238       PruningList.remove(N);
 239
 240       auto It = WorklistMap.find(N);
 241       if (It == WorklistMap.end())
 242         return; // Not in the worklist.
 243
 244       // Null out the entry rather than erasing it to avoid a linear operation.
 245       Worklist[It->second] = nullptr;
 246       WorklistMap.erase(It);
 247     }
 248
 249     void deleteAndRecombine(SDNode *N);
 250     bool recursivelyDeleteUnusedNodes(SDNode *N);
 251
 252     /// Replaces all uses of the results of one DAG node with new values.
 253     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 254                       bool AddTo = true);
 255
 256     /// Replaces all uses of the results of one DAG node with new values.
 257     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 258       return CombineTo(N, &Res, 1, AddTo);
 259     }
 260
 261     /// Replaces all uses of the results of one DAG node with new values.
 262     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 263                       bool AddTo = true) {
 264       SDValue To[] = { Res0, Res1 };
 265       return CombineTo(N, To, 2, AddTo);
 266     }
 267
 268     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 269
 270   private:
 271     unsigned MaximumLegalStoreInBits;
 272
 273     /// Check the specified integer node value to see if it can be simplified or
 274     /// if things it uses can be simplified by bit propagation.
 275     /// If so, return true.
 276     bool SimplifyDemandedBits(SDValue Op) {
 277       unsigned BitWidth = Op.getScalarValueSizeInBits();
 278       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 279       return SimplifyDemandedBits(Op, DemandedBits);
 280     }
 281
 282     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 283       EVT VT = Op.getValueType();
 284       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
 285       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 286       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
 287     }
 288
 289     /// Check the specified vector node value to see if it can be simplified or
 290     /// if things it uses can be simplified as it only uses some of the
 291     /// elements. If so, return true.
 292     bool SimplifyDemandedVectorElts(SDValue Op) {
 293       unsigned NumElts = Op.getValueType().getVectorNumElements();
 294       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 295       return SimplifyDemandedVectorElts(Op, DemandedElts);
 296     }
 297
 298     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 299                               const APInt &DemandedElts);
 300     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 301                                     bool AssumeSingleUse = false);
 302
 303     bool CombineToPreIndexedLoadStore(SDNode *N);
 304     bool CombineToPostIndexedLoadStore(SDNode *N);
 305     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 306     bool SliceUpLoad(SDNode *N);
 307
 308     // Scalars have size 0 to distinguish from singleton vectors.
 309     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 310     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 311     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 312
 313     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 314     ///   load.
 315     ///
 316     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 317     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 318     /// \param EltNo index of the vector element to load.
 319     /// \param OriginalLoad load that EVE came from to be replaced.
 320     /// \returns EVE on success SDValue() on failure.
 321     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 322                                          SDValue EltNo,
 323                                          LoadSDNode *OriginalLoad);
 324     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 325     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 326     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 327     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 328     SDValue PromoteIntBinOp(SDValue Op);
 329     SDValue PromoteIntShiftOp(SDValue Op);
 330     SDValue PromoteExtend(SDValue Op);
 331     bool PromoteLoad(SDValue Op);
 332
 333     /// Call the node-specific routine that knows how to fold each
 334     /// particular type of node. If that doesn't do anything, try the
 335     /// target-specific DAG combines.
 336     SDValue combine(SDNode *N);
 337
 338     // Visitation implementation - Implement dag node combining for different
 339     // node types.  The semantics are as follows:
 340     // Return Value:
 341     //   SDValue.getNode() == 0 - No change was made
 342     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 343     //   otherwise              - N should be replaced by the returned Operand.
 344     //
 345     SDValue visitTokenFactor(SDNode *N);
 346     SDValue visitMERGE_VALUES(SDNode *N);
 347     SDValue visitADD(SDNode *N);
 348     SDValue visitADDLike(SDNode *N);
 349     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 350     SDValue visitSUB(SDNode *N);
 351     SDValue visitADDSAT(SDNode *N);
 352     SDValue visitSUBSAT(SDNode *N);
 353     SDValue visitADDC(SDNode *N);
 354     SDValue visitADDO(SDNode *N);
 355     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 356     SDValue visitSUBC(SDNode *N);
 357     SDValue visitSUBO(SDNode *N);
 358     SDValue visitADDE(SDNode *N);
 359     SDValue visitADDCARRY(SDNode *N);
 360     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 361     SDValue visitSUBE(SDNode *N);
 362     SDValue visitSUBCARRY(SDNode *N);
 363     SDValue visitMUL(SDNode *N);
 364     SDValue useDivRem(SDNode *N);
 365     SDValue visitSDIV(SDNode *N);
 366     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 367     SDValue visitUDIV(SDNode *N);
 368     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 369     SDValue visitREM(SDNode *N);
 370     SDValue visitMULHU(SDNode *N);
 371     SDValue visitMULHS(SDNode *N);
 372     SDValue visitSMUL_LOHI(SDNode *N);
 373     SDValue visitUMUL_LOHI(SDNode *N);
 374     SDValue visitMULO(SDNode *N);
 375     SDValue visitIMINMAX(SDNode *N);
 376     SDValue visitAND(SDNode *N);
 377     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 378     SDValue visitOR(SDNode *N);
 379     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 380     SDValue visitXOR(SDNode *N);
 381     SDValue SimplifyVBinOp(SDNode *N);
 382     SDValue visitSHL(SDNode *N);
 383     SDValue visitSRA(SDNode *N);
 384     SDValue visitSRL(SDNode *N);
 385     SDValue visitFunnelShift(SDNode *N);
 386     SDValue visitRotate(SDNode *N);
 387     SDValue visitABS(SDNode *N);
 388     SDValue visitBSWAP(SDNode *N);
 389     SDValue visitBITREVERSE(SDNode *N);
 390     SDValue visitCTLZ(SDNode *N);
 391     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 392     SDValue visitCTTZ(SDNode *N);
 393     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 394     SDValue visitCTPOP(SDNode *N);
 395     SDValue visitSELECT(SDNode *N);
 396     SDValue visitVSELECT(SDNode *N);
 397     SDValue visitSELECT_CC(SDNode *N);
 398     SDValue visitSETCC(SDNode *N);
 399     SDValue visitSETCCCARRY(SDNode *N);
 400     SDValue visitSIGN_EXTEND(SDNode *N);
 401     SDValue visitZERO_EXTEND(SDNode *N);
 402     SDValue visitANY_EXTEND(SDNode *N);
 403     SDValue visitAssertExt(SDNode *N);
 404     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 405     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 406     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 407     SDValue visitTRUNCATE(SDNode *N);
 408     SDValue visitBITCAST(SDNode *N);
 409     SDValue visitBUILD_PAIR(SDNode *N);
 410     SDValue visitFADD(SDNode *N);
 411     SDValue visitFSUB(SDNode *N);
 412     SDValue visitFMUL(SDNode *N);
 413     SDValue visitFMA(SDNode *N);
 414     SDValue visitFDIV(SDNode *N);
 415     SDValue visitFREM(SDNode *N);
 416     SDValue visitFSQRT(SDNode *N);
 417     SDValue visitFCOPYSIGN(SDNode *N);
 418     SDValue visitFPOW(SDNode *N);
 419     SDValue visitSINT_TO_FP(SDNode *N);
 420     SDValue visitUINT_TO_FP(SDNode *N);
 421     SDValue visitFP_TO_SINT(SDNode *N);
 422     SDValue visitFP_TO_UINT(SDNode *N);
 423     SDValue visitFP_ROUND(SDNode *N);
 424     SDValue visitFP_ROUND_INREG(SDNode *N);
 425     SDValue visitFP_EXTEND(SDNode *N);
 426     SDValue visitFNEG(SDNode *N);
 427     SDValue visitFABS(SDNode *N);
 428     SDValue visitFCEIL(SDNode *N);
 429     SDValue visitFTRUNC(SDNode *N);
 430     SDValue visitFFLOOR(SDNode *N);
 431     SDValue visitFMINNUM(SDNode *N);
 432     SDValue visitFMAXNUM(SDNode *N);
 433     SDValue visitFMINIMUM(SDNode *N);
 434     SDValue visitFMAXIMUM(SDNode *N);
 435     SDValue visitBRCOND(SDNode *N);
 436     SDValue visitBR_CC(SDNode *N);
 437     SDValue visitLOAD(SDNode *N);
 438
 439     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 440     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 441
 442     SDValue visitSTORE(SDNode *N);
 443     SDValue visitLIFETIME_END(SDNode *N);
 444     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 445     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 446     SDValue visitBUILD_VECTOR(SDNode *N);
 447     SDValue visitCONCAT_VECTORS(SDNode *N);
 448     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 449     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 450     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 451     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 452     SDValue visitMLOAD(SDNode *N);
 453     SDValue visitMSTORE(SDNode *N);
 454     SDValue visitMGATHER(SDNode *N);
 455     SDValue visitMSCATTER(SDNode *N);
 456     SDValue visitFP_TO_FP16(SDNode *N);
 457     SDValue visitFP16_TO_FP(SDNode *N);
 458     SDValue visitVECREDUCE(SDNode *N);
 459
 460     SDValue visitFADDForFMACombine(SDNode *N);
 461     SDValue visitFSUBForFMACombine(SDNode *N);
 462     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 463
 464     SDValue XformToShuffleWithZero(SDNode *N);
 465     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 466                                                     const SDLoc &DL, SDValue N0,
 467                                                     SDValue N1);
 468     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 469                                       SDValue N1);
 470     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 471                            SDValue N1, SDNodeFlags Flags);
 472
 473     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 474
 475     SDValue foldSelectOfConstants(SDNode *N);
 476     SDValue foldVSelectOfConstants(SDNode *N);
 477     SDValue foldBinOpIntoSelect(SDNode *BO);
 478     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 479     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 480     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 481     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 482                              SDValue N2, SDValue N3, ISD::CondCode CC,
 483                              bool NotExtCompare = false);
 484     SDValue convertSelectOfFPConstantsToLoadOffset(
 485         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 486         ISD::CondCode CC);
 487     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 488                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 489     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 490                               const SDLoc &DL);
 491     SDValue unfoldMaskedMerge(SDNode *N);
 492     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 493     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 494                           const SDLoc &DL, bool foldBooleans);
 495     SDValue rebuildSetCC(SDValue N);
 496
 497     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 498                            SDValue &CC) const;
 499     bool isOneUseSetCC(SDValue N) const;
 500
 501     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 502                                          unsigned HiOp);
 503     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 504     SDValue CombineExtLoad(SDNode *N);
 505     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 506     SDValue combineRepeatedFPDivisors(SDNode *N);
 507     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 508     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 509     SDValue BuildSDIV(SDNode *N);
 510     SDValue BuildSDIVPow2(SDNode *N);
 511     SDValue BuildUDIV(SDNode *N);
 512     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 513     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 514     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 515     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 516     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 517     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 518                                 SDNodeFlags Flags, bool Reciprocal);
 519     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 520                                 SDNodeFlags Flags, bool Reciprocal);
 521     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 522                                bool DemandHighBits = true);
 523     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 524     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 525                               SDValue InnerPos, SDValue InnerNeg,
 526                               unsigned PosOpcode, unsigned NegOpcode,
 527                               const SDLoc &DL);
 528     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 529     SDValue MatchLoadCombine(SDNode *N);
 530     SDValue MatchStoreCombine(StoreSDNode *N);
 531     SDValue ReduceLoadWidth(SDNode *N);
 532     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 533     SDValue splitMergedValStore(StoreSDNode *ST);
 534     SDValue TransformFPLoadStorePair(SDNode *N);
 535     SDValue convertBuildVecZextToZext(SDNode *N);
 536     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 537     SDValue reduceBuildVecToShuffle(SDNode *N);
 538     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 539                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 540                                   SDValue VecIn2, unsigned LeftIdx,
 541                                   bool DidSplitVec);
 542     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 543
 544     /// Walk up chain skipping non-aliasing memory nodes,
 545     /// looking for aliasing nodes and adding them to the Aliases vector.
 546     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 547                           SmallVectorImpl<SDValue> &Aliases);
 548
 549     /// Return true if there is any possibility that the two addresses overlap.
 550     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 551
 552     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 553     /// chain (aliasing node.)
 554     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 555
 556     /// Try to replace a store and any possibly adjacent stores on
 557     /// consecutive chains with better chains. Return true only if St is
 558     /// replaced.
 559     ///
 560     /// Notice that other chains may still be replaced even if the function
 561     /// returns false.
 562     bool findBetterNeighborChains(StoreSDNode *St);
 563
 564     // Helper for findBetterNeighborChains. Walk up store chain add additional
 565     // chained stores that do not overlap and can be parallelized.
 566     bool parallelizeChainedStores(StoreSDNode *St);
 567
 568     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 569     /// is located in a sequence of memory operations connected by a chain.
 570     struct MemOpLink {
 571       // Ptr to the mem node.
 572       LSBaseSDNode *MemNode;
 573
 574       // Offset from the base ptr.
 575       int64_t OffsetFromBase;
 576
 577       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 578           : MemNode(N), OffsetFromBase(Offset) {}
 579     };
 580
 581     /// This is a helper function for visitMUL to check the profitability
 582     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 583     /// MulNode is the original multiply, AddNode is (add x, c1),
 584     /// and ConstNode is c2.
 585     bool isMulAddWithConstProfitable(SDNode *MulNode,
 586                                      SDValue &AddNode,
 587                                      SDValue &ConstNode);
 588
 589     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 590     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 591     /// the type of the loaded value to be extended.
 592     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 593                           EVT LoadResultTy, EVT &ExtVT);
 594
 595     /// Helper function to calculate whether the given Load/Store can have its
 596     /// width reduced to ExtVT.
 597     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 598                            EVT &MemVT, unsigned ShAmt = 0);
 599
 600     /// Used by BackwardsPropagateMask to find suitable loads.
 601     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 602                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 603                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 604     /// Attempt to propagate a given AND node back to load leaves so that they
 605     /// can be combined into narrow loads.
 606     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 607
 608     /// Helper function for MergeConsecutiveStores which merges the
 609     /// component store chains.
 610     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 611                                 unsigned NumStores);
 612
 613     /// This is a helper function for MergeConsecutiveStores. When the
 614     /// source elements of the consecutive stores are all constants or
 615     /// all extracted vector elements, try to merge them into one
 616     /// larger store introducing bitcasts if necessary.  \return True
 617     /// if a merged store was created.
 618     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 619                                          EVT MemVT, unsigned NumStores,
 620                                          bool IsConstantSrc, bool UseVector,
 621                                          bool UseTrunc);
 622
 623     /// This is a helper function for MergeConsecutiveStores. Stores
 624     /// that potentially may be merged with St are placed in
 625     /// StoreNodes. RootNode is a chain predecessor to all store
 626     /// candidates.
 627     void getStoreMergeCandidates(StoreSDNode *St,
 628                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 629                                  SDNode *&Root);
 630
 631     /// Helper function for MergeConsecutiveStores. Checks if
 632     /// candidate stores have indirect dependency through their
 633     /// operands. RootNode is the predecessor to all stores calculated
 634     /// by getStoreMergeCandidates and is used to prune the dependency check.
 635     /// \return True if safe to merge.
 636     bool checkMergeStoreCandidatesForDependencies(
 637         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 638         SDNode *RootNode);
 639
 640     /// Merge consecutive store operations into a wide store.
 641     /// This optimization uses wide integers or vectors when possible.
 642     /// \return number of stores that were merged into a merged store (the
 643     /// affected nodes are stored as a prefix in \p StoreNodes).
 644     bool MergeConsecutiveStores(StoreSDNode *St);
 645
 646     /// Try to transform a truncation where C is a constant:
 647     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 648     ///
 649     /// \p N needs to be a truncation and its first operand an AND. Other
 650     /// requirements are checked by the function (e.g. that trunc is
 651     /// single-use) and if missed an empty SDValue is returned.
 652     SDValue distributeTruncateThroughAnd(SDNode *N);
 653
 654     /// Helper function to determine whether the target supports operation
 655     /// given by \p Opcode for type \p VT, that is, whether the operation
 656     /// is legal or custom before legalizing operations, and whether is
 657     /// legal (but not custom) after legalization.
 658     bool hasOperation(unsigned Opcode, EVT VT) {
 659       if (LegalOperations)
 660         return TLI.isOperationLegal(Opcode, VT);
 661       return TLI.isOperationLegalOrCustom(Opcode, VT);
 662     }
 663
 664   public:
 665     /// Runs the dag combiner on all nodes in the work list
 666     void Run(CombineLevel AtLevel);
 667
 668     SelectionDAG &getDAG() const { return DAG; }
 669
 670     /// Returns a type large enough to hold any valid shift amount - before type
 671     /// legalization these can be huge.
 672     EVT getShiftAmountTy(EVT LHSTy) {
 673       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 674       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 675     }
 676
 677     /// This method returns true if we are running before type legalization or
 678     /// if the specified VT is legal.
 679     bool isTypeLegal(const EVT &VT) {
 680       if (!LegalTypes) return true;
 681       return TLI.isTypeLegal(VT);
 682     }
 683
 684     /// Convenience wrapper around TargetLowering::getSetCCResultType
 685     EVT getSetCCResultType(EVT VT) const {
 686       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 687     }
 688
 689     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 690                          SDValue OrigLoad, SDValue ExtLoad,
 691                          ISD::NodeType ExtType);
 692   };
 693
 694 /// This class is a DAGUpdateListener that removes any deleted
 695 /// nodes from the worklist.
 696 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 697   DAGCombiner &DC;
 698
 699 public:
 700   explicit WorklistRemover(DAGCombiner &dc)
 701     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 702
 703   void NodeDeleted(SDNode *N, SDNode *E) override {
 704     DC.removeFromWorklist(N);
 705   }
 706 };
 707
 708 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 709   DAGCombiner &DC;
 710
 711 public:
 712   explicit WorklistInserter(DAGCombiner &dc)
 713       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 714
 715   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 716   //        compile time costs in large DAGs, e.g. Halide.
 717   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 718 };
 719
 720 } // end anonymous namespace
 721
 722 //===----------------------------------------------------------------------===//
 723 //  TargetLowering::DAGCombinerInfo implementation
 724 //===----------------------------------------------------------------------===//
 725
 726 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 727   ((DAGCombiner*)DC)->AddToWorklist(N);
 728 }
 729
 730 SDValue TargetLowering::DAGCombinerInfo::
 731 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 732   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 733 }
 734
 735 SDValue TargetLowering::DAGCombinerInfo::
 736 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 737   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 738 }
 739
 740 SDValue TargetLowering::DAGCombinerInfo::
 741 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 742   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 743 }
 744
 745 void TargetLowering::DAGCombinerInfo::
 746 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 747   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 748 }
 749
 750 //===----------------------------------------------------------------------===//
 751 // Helper Functions
 752 //===----------------------------------------------------------------------===//
 753
 754 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 755   removeFromWorklist(N);
 756
 757   // If the operands of this node are only used by the node, they will now be
 758   // dead. Make sure to re-visit them and recursively delete dead nodes.
 759   for (const SDValue &Op : N->ops())
 760     // For an operand generating multiple values, one of the values may
 761     // become dead allowing further simplification (e.g. split index
 762     // arithmetic from an indexed load).
 763     if (Op->hasOneUse() || Op->getNumValues() > 1)
 764       AddToWorklist(Op.getNode());
 765
 766   DAG.DeleteNode(N);
 767 }
 768
 769 /// Return 1 if we can compute the negated form of the specified expression for
 770 /// the same cost as the expression itself, or 2 if we can compute the negated
 771 /// form more cheaply than the expression itself.
 772 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 773                                const TargetLowering &TLI,
 774                                const TargetOptions *Options,
 775                                bool ForCodeSize,
 776                                unsigned Depth = 0) {
 777   // fneg is removable even if it has multiple uses.
 778   if (Op.getOpcode() == ISD::FNEG)
 779     return 2;
 780
 781   // Don't allow anything with multiple uses unless we know it is free.
 782   EVT VT = Op.getValueType();
 783   const SDNodeFlags Flags = Op->getFlags();
 784   if (!Op.hasOneUse() &&
 785       !(Op.getOpcode() == ISD::FP_EXTEND &&
 786         TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
 787     return 0;
 788
 789   // Don't recurse exponentially.
 790   if (Depth > 6)
 791     return 0;
 792
 793   switch (Op.getOpcode()) {
 794   default: return false;
 795   case ISD::ConstantFP: {
 796     if (!LegalOperations)
 797       return 1;
 798
 799     // Don't invert constant FP values after legalization unless the target says
 800     // the negated constant is legal.
 801     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 802            TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
 803                             ForCodeSize);
 804   }
 805   case ISD::BUILD_VECTOR: {
 806     // Only permit BUILD_VECTOR of constants.
 807     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
 808           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
 809         }))
 810       return 0;
 811     if (!LegalOperations)
 812       return 1;
 813     if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
 814         TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
 815       return 1;
 816     return llvm::all_of(Op->op_values(), [&](SDValue N) {
 817       return N.isUndef() ||
 818              TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
 819                               ForCodeSize);
 820     });
 821   }
 822   case ISD::FADD:
 823     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
 824       return 0;
 825
 826     // After operation legalization, it might not be legal to create new FSUBs.
 827     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
 828       return 0;
 829
 830     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 831     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 832                                     Options, ForCodeSize, Depth + 1))
 833       return V;
 834     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 835     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 836                               ForCodeSize, Depth + 1);
 837   case ISD::FSUB:
 838     // We can't turn -(A-B) into B-A when we honor signed zeros.
 839     if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
 840       return 0;
 841
 842     // fold (fneg (fsub A, B)) -> (fsub B, A)
 843     return 1;
 844
 845   case ISD::FMUL:
 846   case ISD::FDIV:
 847     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 848     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 849                                     Options, ForCodeSize, Depth + 1))
 850       return V;
 851
 852     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 853                               ForCodeSize, Depth + 1);
 854
 855   case ISD::FP_EXTEND:
 856   case ISD::FP_ROUND:
 857   case ISD::FSIN:
 858     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 859                               ForCodeSize, Depth + 1);
 860   }
 861 }
 862
 863 /// If isNegatibleForFree returns true, return the newly negated expression.
 864 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 865                                     bool LegalOperations, bool ForCodeSize,
 866                                     unsigned Depth = 0) {
 867   // fneg is removable even if it has multiple uses.
 868   if (Op.getOpcode() == ISD::FNEG)
 869     return Op.getOperand(0);
 870
 871   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 872   const TargetOptions &Options = DAG.getTarget().Options;
 873   const SDNodeFlags Flags = Op->getFlags();
 874
 875   switch (Op.getOpcode()) {
 876   default: llvm_unreachable("Unknown code");
 877   case ISD::ConstantFP: {
 878     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 879     V.changeSign();
 880     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 881   }
 882   case ISD::BUILD_VECTOR: {
 883     SmallVector<SDValue, 4> Ops;
 884     for (SDValue C : Op->op_values()) {
 885       if (C.isUndef()) {
 886         Ops.push_back(C);
 887         continue;
 888       }
 889       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
 890       V.changeSign();
 891       Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
 892     }
 893     return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
 894   }
 895   case ISD::FADD:
 896     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
 897
 898     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 899     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 900                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
 901                            Depth + 1))
 902       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 903                          GetNegatedExpression(Op.getOperand(0), DAG,
 904                                               LegalOperations, ForCodeSize,
 905                                               Depth + 1),
 906                          Op.getOperand(1), Flags);
 907     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 908     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 909                        GetNegatedExpression(Op.getOperand(1), DAG,
 910                                             LegalOperations, ForCodeSize,
 911                                             Depth + 1),
 912                        Op.getOperand(0), Flags);
 913   case ISD::FSUB:
 914     // fold (fneg (fsub 0, B)) -> B
 915     if (ConstantFPSDNode *N0CFP =
 916             isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
 917       if (N0CFP->isZero())
 918         return Op.getOperand(1);
 919
 920     // fold (fneg (fsub A, B)) -> (fsub B, A)
 921     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 922                        Op.getOperand(1), Op.getOperand(0), Flags);
 923
 924   case ISD::FMUL:
 925   case ISD::FDIV:
 926     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 927     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 928                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
 929                            Depth + 1))
 930       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 931                          GetNegatedExpression(Op.getOperand(0), DAG,
 932                                               LegalOperations, ForCodeSize,
 933                                               Depth + 1),
 934                          Op.getOperand(1), Flags);
 935
 936     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 937     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 938                        Op.getOperand(0),
 939                        GetNegatedExpression(Op.getOperand(1), DAG,
 940                                             LegalOperations, ForCodeSize,
 941                                             Depth + 1), Flags);
 942
 943   case ISD::FP_EXTEND:
 944   case ISD::FSIN:
 945     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 946                        GetNegatedExpression(Op.getOperand(0), DAG,
 947                                             LegalOperations, ForCodeSize,
 948                                             Depth + 1));
 949   case ISD::FP_ROUND:
 950     return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 951                        GetNegatedExpression(Op.getOperand(0), DAG,
 952                                             LegalOperations, ForCodeSize,
 953                                             Depth + 1),
 954                        Op.getOperand(1));
 955   }
 956 }
 957
 958 // APInts must be the same size for most operations, this helper
 959 // function zero extends the shorter of the pair so that they match.
 960 // We provide an Offset so that we can create bitwidths that won't overflow.
 961 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 962   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 963   LHS = LHS.zextOrSelf(Bits);
 964   RHS = RHS.zextOrSelf(Bits);
 965 }
 966
 967 // Return true if this node is a setcc, or is a select_cc
 968 // that selects between the target values used for true and false, making it
 969 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 970 // the appropriate nodes based on the type of node we are checking. This
 971 // simplifies life a bit for the callers.
 972 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 973                                     SDValue &CC) const {
 974   if (N.getOpcode() == ISD::SETCC) {
 975     LHS = N.getOperand(0);
 976     RHS = N.getOperand(1);
 977     CC  = N.getOperand(2);
 978     return true;
 979   }
 980
 981   if (N.getOpcode() != ISD::SELECT_CC ||
 982       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 983       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 984     return false;
 985
 986   if (TLI.getBooleanContents(N.getValueType()) ==
 987       TargetLowering::UndefinedBooleanContent)
 988     return false;
 989
 990   LHS = N.getOperand(0);
 991   RHS = N.getOperand(1);
 992   CC  = N.getOperand(4);
 993   return true;
 994 }
 995
 996 /// Return true if this is a SetCC-equivalent operation with only one use.
 997 /// If this is true, it allows the users to invert the operation for free when
 998 /// it is profitable to do so.
 999 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000   SDValue N0, N1, N2;
1001   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
1002     return true;
1003   return false;
1004 }
1005
1006 // Returns the SDNode if it is a constant float BuildVector
1007 // or constant float.
1008 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
1009   if (isa<ConstantFPSDNode>(N))
1010     return N.getNode();
1011   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
1012     return N.getNode();
1013   return nullptr;
1014 }
1015
1016 // Determines if it is a constant integer or a build vector of constant
1017 // integers (and undefs).
1018 // Do not permit build vector implicit truncation.
1019 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1020   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1021     return !(Const->isOpaque() && NoOpaques);
1022   if (N.getOpcode() != ISD::BUILD_VECTOR)
1023     return false;
1024   unsigned BitWidth = N.getScalarValueSizeInBits();
1025   for (const SDValue &Op : N->op_values()) {
1026     if (Op.isUndef())
1027       continue;
1028     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1029     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1030         (Const->isOpaque() && NoOpaques))
1031       return false;
1032   }
1033   return true;
1034 }
1035
1036 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1037 // undef's.
1038 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1039   if (V.getOpcode() != ISD::BUILD_VECTOR)
1040     return false;
1041   return isConstantOrConstantVector(V, NoOpaques) ||
1042          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1043 }
1044
1045 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1046                                                              const SDLoc &DL,
1047                                                              SDValue N0,
1048                                                              SDValue N1) {
1049   // Currently this only tries to ensure we don't undo the GEP splits done by
1050   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1051   // we check if the following transformation would be problematic:
1052   // (load/store (add, (add, x, offset1), offset2)) ->
1053   // (load/store (add, x, offset1+offset2)).
1054
1055   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1056     return false;
1057
1058   if (N0.hasOneUse())
1059     return false;
1060
1061   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1062   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1063   if (!C1 || !C2)
1064     return false;
1065
1066   const APInt &C1APIntVal = C1->getAPIntValue();
1067   const APInt &C2APIntVal = C2->getAPIntValue();
1068   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1069     return false;
1070
1071   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1072   if (CombinedValueIntVal.getBitWidth() > 64)
1073     return false;
1074   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1075
1076   for (SDNode *Node : N0->uses()) {
1077     auto LoadStore = dyn_cast<MemSDNode>(Node);
1078     if (LoadStore) {
1079       // Is x[offset2] already not a legal addressing mode? If so then
1080       // reassociating the constants breaks nothing (we test offset2 because
1081       // that's the one we hope to fold into the load or store).
1082       TargetLoweringBase::AddrMode AM;
1083       AM.HasBaseReg = true;
1084       AM.BaseOffs = C2APIntVal.getSExtValue();
1085       EVT VT = LoadStore->getMemoryVT();
1086       unsigned AS = LoadStore->getAddressSpace();
1087       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1088       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1089         continue;
1090
1091       // Would x[offset1+offset2] still be a legal addressing mode?
1092       AM.BaseOffs = CombinedValue;
1093       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1094         return true;
1095     }
1096   }
1097
1098   return false;
1099 }
1100
1101 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1102 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1103 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1104                                                SDValue N0, SDValue N1) {
1105   EVT VT = N0.getValueType();
1106
1107   if (N0.getOpcode() != Opc)
1108     return SDValue();
1109
1110   // Don't reassociate reductions.
1111   if (N0->getFlags().hasVectorReduction())
1112     return SDValue();
1113
1114   if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1115     if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1116       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1117       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1118         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1119       return SDValue();
1120     }
1121     if (N0.hasOneUse()) {
1122       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1123       //              iff (op x, c1) has one use
1124       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1125       if (!OpNode.getNode())
1126         return SDValue();
1127       AddToWorklist(OpNode.getNode());
1128       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1129     }
1130   }
1131   return SDValue();
1132 }
1133
1134 // Try to reassociate commutative binops.
1135 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1136                                     SDValue N1, SDNodeFlags Flags) {
1137   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1138   // Don't reassociate reductions.
1139   if (Flags.hasVectorReduction())
1140     return SDValue();
1141
1142   // Floating-point reassociation is not allowed without loose FP math.
1143   if (N0.getValueType().isFloatingPoint() ||
1144       N1.getValueType().isFloatingPoint())
1145     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1146       return SDValue();
1147
1148   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1149     return Combined;
1150   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1151     return Combined;
1152   return SDValue();
1153 }
1154
1155 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1156                                bool AddTo) {
1157   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1158   ++NodesCombined;
1159   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1160              To[0].getNode()->dump(&DAG);
1161              dbgs() << " and " << NumTo - 1 << " other values\n");
1162   for (unsigned i = 0, e = NumTo; i != e; ++i)
1163     assert((!To[i].getNode() ||
1164             N->getValueType(i) == To[i].getValueType()) &&
1165            "Cannot combine value to value of different type!");
1166
1167   WorklistRemover DeadNodes(*this);
1168   DAG.ReplaceAllUsesWith(N, To);
1169   if (AddTo) {
1170     // Push the new nodes and any users onto the worklist
1171     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1172       if (To[i].getNode()) {
1173         AddToWorklist(To[i].getNode());
1174         AddUsersToWorklist(To[i].getNode());
1175       }
1176     }
1177   }
1178
1179   // Finally, if the node is now dead, remove it from the graph.  The node
1180   // may not be dead if the replacement process recursively simplified to
1181   // something else needing this node.
1182   if (N->use_empty())
1183     deleteAndRecombine(N);
1184   return SDValue(N, 0);
1185 }
1186
1187 void DAGCombiner::
1188 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1189   // Replace all uses.  If any nodes become isomorphic to other nodes and
1190   // are deleted, make sure to remove them from our worklist.
1191   WorklistRemover DeadNodes(*this);
1192   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1193
1194   // Push the new node and any (possibly new) users onto the worklist.
1195   AddToWorklist(TLO.New.getNode());
1196   AddUsersToWorklist(TLO.New.getNode());
1197
1198   // Finally, if the node is now dead, remove it from the graph.  The node
1199   // may not be dead if the replacement process recursively simplified to
1200   // something else needing this node.
1201   if (TLO.Old.getNode()->use_empty())
1202     deleteAndRecombine(TLO.Old.getNode());
1203 }
1204
1205 /// Check the specified integer node value to see if it can be simplified or if
1206 /// things it uses can be simplified by bit propagation. If so, return true.
1207 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1208                                        const APInt &DemandedElts) {
1209   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1210   KnownBits Known;
1211   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1212     return false;
1213
1214   // Revisit the node.
1215   AddToWorklist(Op.getNode());
1216
1217   // Replace the old value with the new one.
1218   ++NodesCombined;
1219   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1220              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1221              dbgs() << '\n');
1222
1223   CommitTargetLoweringOpt(TLO);
1224   return true;
1225 }
1226
1227 /// Check the specified vector node value to see if it can be simplified or
1228 /// if things it uses can be simplified as it only uses some of the elements.
1229 /// If so, return true.
1230 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1231                                              const APInt &DemandedElts,
1232                                              bool AssumeSingleUse) {
1233   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1234   APInt KnownUndef, KnownZero;
1235   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1236                                       TLO, 0, AssumeSingleUse))
1237     return false;
1238
1239   // Revisit the node.
1240   AddToWorklist(Op.getNode());
1241
1242   // Replace the old value with the new one.
1243   ++NodesCombined;
1244   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1245              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1246              dbgs() << '\n');
1247
1248   CommitTargetLoweringOpt(TLO);
1249   return true;
1250 }
1251
1252 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1253   SDLoc DL(Load);
1254   EVT VT = Load->getValueType(0);
1255   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1256
1257   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1258              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1259   WorklistRemover DeadNodes(*this);
1260   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1261   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1262   deleteAndRecombine(Load);
1263   AddToWorklist(Trunc.getNode());
1264 }
1265
1266 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1267   Replace = false;
1268   SDLoc DL(Op);
1269   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1270     LoadSDNode *LD = cast<LoadSDNode>(Op);
1271     EVT MemVT = LD->getMemoryVT();
1272     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1273                                                       : LD->getExtensionType();
1274     Replace = true;
1275     return DAG.getExtLoad(ExtType, DL, PVT,
1276                           LD->getChain(), LD->getBasePtr(),
1277                           MemVT, LD->getMemOperand());
1278   }
1279
1280   unsigned Opc = Op.getOpcode();
1281   switch (Opc) {
1282   default: break;
1283   case ISD::AssertSext:
1284     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1285       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1286     break;
1287   case ISD::AssertZext:
1288     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1289       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1290     break;
1291   case ISD::Constant: {
1292     unsigned ExtOpc =
1293       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1294     return DAG.getNode(ExtOpc, DL, PVT, Op);
1295   }
1296   }
1297
1298   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1299     return SDValue();
1300   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1301 }
1302
1303 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1304   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1305     return SDValue();
1306   EVT OldVT = Op.getValueType();
1307   SDLoc DL(Op);
1308   bool Replace = false;
1309   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1310   if (!NewOp.getNode())
1311     return SDValue();
1312   AddToWorklist(NewOp.getNode());
1313
1314   if (Replace)
1315     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1316   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1317                      DAG.getValueType(OldVT));
1318 }
1319
1320 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1321   EVT OldVT = Op.getValueType();
1322   SDLoc DL(Op);
1323   bool Replace = false;
1324   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1325   if (!NewOp.getNode())
1326     return SDValue();
1327   AddToWorklist(NewOp.getNode());
1328
1329   if (Replace)
1330     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1331   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1332 }
1333
1334 /// Promote the specified integer binary operation if the target indicates it is
1335 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1336 /// i32 since i16 instructions are longer.
1337 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1338   if (!LegalOperations)
1339     return SDValue();
1340
1341   EVT VT = Op.getValueType();
1342   if (VT.isVector() || !VT.isInteger())
1343     return SDValue();
1344
1345   // If operation type is 'undesirable', e.g. i16 on x86, consider
1346   // promoting it.
1347   unsigned Opc = Op.getOpcode();
1348   if (TLI.isTypeDesirableForOp(Opc, VT))
1349     return SDValue();
1350
1351   EVT PVT = VT;
1352   // Consult target whether it is a good idea to promote this operation and
1353   // what's the right type to promote it to.
1354   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1355     assert(PVT != VT && "Don't know what type to promote to!");
1356
1357     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1358
1359     bool Replace0 = false;
1360     SDValue N0 = Op.getOperand(0);
1361     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1362
1363     bool Replace1 = false;
1364     SDValue N1 = Op.getOperand(1);
1365     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1366     SDLoc DL(Op);
1367
1368     SDValue RV =
1369         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1370
1371     // We are always replacing N0/N1's use in N and only need
1372     // additional replacements if there are additional uses.
1373     Replace0 &= !N0->hasOneUse();
1374     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1375
1376     // Combine Op here so it is preserved past replacements.
1377     CombineTo(Op.getNode(), RV);
1378
1379     // If operands have a use ordering, make sure we deal with
1380     // predecessor first.
1381     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1382       std::swap(N0, N1);
1383       std::swap(NN0, NN1);
1384     }
1385
1386     if (Replace0) {
1387       AddToWorklist(NN0.getNode());
1388       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1389     }
1390     if (Replace1) {
1391       AddToWorklist(NN1.getNode());
1392       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1393     }
1394     return Op;
1395   }
1396   return SDValue();
1397 }
1398
1399 /// Promote the specified integer shift operation if the target indicates it is
1400 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1401 /// i32 since i16 instructions are longer.
1402 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1403   if (!LegalOperations)
1404     return SDValue();
1405
1406   EVT VT = Op.getValueType();
1407   if (VT.isVector() || !VT.isInteger())
1408     return SDValue();
1409
1410   // If operation type is 'undesirable', e.g. i16 on x86, consider
1411   // promoting it.
1412   unsigned Opc = Op.getOpcode();
1413   if (TLI.isTypeDesirableForOp(Opc, VT))
1414     return SDValue();
1415
1416   EVT PVT = VT;
1417   // Consult target whether it is a good idea to promote this operation and
1418   // what's the right type to promote it to.
1419   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1420     assert(PVT != VT && "Don't know what type to promote to!");
1421
1422     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1423
1424     bool Replace = false;
1425     SDValue N0 = Op.getOperand(0);
1426     SDValue N1 = Op.getOperand(1);
1427     if (Opc == ISD::SRA)
1428       N0 = SExtPromoteOperand(N0, PVT);
1429     else if (Opc == ISD::SRL)
1430       N0 = ZExtPromoteOperand(N0, PVT);
1431     else
1432       N0 = PromoteOperand(N0, PVT, Replace);
1433
1434     if (!N0.getNode())
1435       return SDValue();
1436
1437     SDLoc DL(Op);
1438     SDValue RV =
1439         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1440
1441     AddToWorklist(N0.getNode());
1442     if (Replace)
1443       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1444
1445     // Deal with Op being deleted.
1446     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1447       return RV;
1448   }
1449   return SDValue();
1450 }
1451
1452 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1453   if (!LegalOperations)
1454     return SDValue();
1455
1456   EVT VT = Op.getValueType();
1457   if (VT.isVector() || !VT.isInteger())
1458     return SDValue();
1459
1460   // If operation type is 'undesirable', e.g. i16 on x86, consider
1461   // promoting it.
1462   unsigned Opc = Op.getOpcode();
1463   if (TLI.isTypeDesirableForOp(Opc, VT))
1464     return SDValue();
1465
1466   EVT PVT = VT;
1467   // Consult target whether it is a good idea to promote this operation and
1468   // what's the right type to promote it to.
1469   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1470     assert(PVT != VT && "Don't know what type to promote to!");
1471     // fold (aext (aext x)) -> (aext x)
1472     // fold (aext (zext x)) -> (zext x)
1473     // fold (aext (sext x)) -> (sext x)
1474     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1475     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1476   }
1477   return SDValue();
1478 }
1479
1480 bool DAGCombiner::PromoteLoad(SDValue Op) {
1481   if (!LegalOperations)
1482     return false;
1483
1484   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1485     return false;
1486
1487   EVT VT = Op.getValueType();
1488   if (VT.isVector() || !VT.isInteger())
1489     return false;
1490
1491   // If operation type is 'undesirable', e.g. i16 on x86, consider
1492   // promoting it.
1493   unsigned Opc = Op.getOpcode();
1494   if (TLI.isTypeDesirableForOp(Opc, VT))
1495     return false;
1496
1497   EVT PVT = VT;
1498   // Consult target whether it is a good idea to promote this operation and
1499   // what's the right type to promote it to.
1500   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1501     assert(PVT != VT && "Don't know what type to promote to!");
1502
1503     SDLoc DL(Op);
1504     SDNode *N = Op.getNode();
1505     LoadSDNode *LD = cast<LoadSDNode>(N);
1506     EVT MemVT = LD->getMemoryVT();
1507     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1508                                                       : LD->getExtensionType();
1509     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1510                                    LD->getChain(), LD->getBasePtr(),
1511                                    MemVT, LD->getMemOperand());
1512     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1513
1514     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1515                Result.getNode()->dump(&DAG); dbgs() << '\n');
1516     WorklistRemover DeadNodes(*this);
1517     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1518     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1519     deleteAndRecombine(N);
1520     AddToWorklist(Result.getNode());
1521     return true;
1522   }
1523   return false;
1524 }
1525
1526 /// Recursively delete a node which has no uses and any operands for
1527 /// which it is the only use.
1528 ///
1529 /// Note that this both deletes the nodes and removes them from the worklist.
1530 /// It also adds any nodes who have had a user deleted to the worklist as they
1531 /// may now have only one use and subject to other combines.
1532 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1533   if (!N->use_empty())
1534     return false;
1535
1536   SmallSetVector<SDNode *, 16> Nodes;
1537   Nodes.insert(N);
1538   do {
1539     N = Nodes.pop_back_val();
1540     if (!N)
1541       continue;
1542
1543     if (N->use_empty()) {
1544       for (const SDValue &ChildN : N->op_values())
1545         Nodes.insert(ChildN.getNode());
1546
1547       removeFromWorklist(N);
1548       DAG.DeleteNode(N);
1549     } else {
1550       AddToWorklist(N);
1551     }
1552   } while (!Nodes.empty());
1553   return true;
1554 }
1555
1556 //===----------------------------------------------------------------------===//
1557 //  Main DAG Combiner implementation
1558 //===----------------------------------------------------------------------===//
1559
1560 void DAGCombiner::Run(CombineLevel AtLevel) {
1561   // set the instance variables, so that the various visit routines may use it.
1562   Level = AtLevel;
1563   LegalOperations = Level >= AfterLegalizeVectorOps;
1564   LegalTypes = Level >= AfterLegalizeTypes;
1565
1566   WorklistInserter AddNodes(*this);
1567
1568   // Add all the dag nodes to the worklist.
1569   for (SDNode &Node : DAG.allnodes())
1570     AddToWorklist(&Node);
1571
1572   // Create a dummy node (which is not added to allnodes), that adds a reference
1573   // to the root node, preventing it from being deleted, and tracking any
1574   // changes of the root.
1575   HandleSDNode Dummy(DAG.getRoot());
1576
1577   // While we have a valid worklist entry node, try to combine it.
1578   while (SDNode *N = getNextWorklistEntry()) {
1579     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1580     // N is deleted from the DAG, since they too may now be dead or may have a
1581     // reduced number of uses, allowing other xforms.
1582     if (recursivelyDeleteUnusedNodes(N))
1583       continue;
1584
1585     WorklistRemover DeadNodes(*this);
1586
1587     // If this combine is running after legalizing the DAG, re-legalize any
1588     // nodes pulled off the worklist.
1589     if (Level == AfterLegalizeDAG) {
1590       SmallSetVector<SDNode *, 16> UpdatedNodes;
1591       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1592
1593       for (SDNode *LN : UpdatedNodes) {
1594         AddToWorklist(LN);
1595         AddUsersToWorklist(LN);
1596       }
1597       if (!NIsValid)
1598         continue;
1599     }
1600
1601     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1602
1603     // Add any operands of the new node which have not yet been combined to the
1604     // worklist as well. Because the worklist uniques things already, this
1605     // won't repeatedly process the same operand.
1606     CombinedNodes.insert(N);
1607     for (const SDValue &ChildN : N->op_values())
1608       if (!CombinedNodes.count(ChildN.getNode()))
1609         AddToWorklist(ChildN.getNode());
1610
1611     SDValue RV = combine(N);
1612
1613     if (!RV.getNode())
1614       continue;
1615
1616     ++NodesCombined;
1617
1618     // If we get back the same node we passed in, rather than a new node or
1619     // zero, we know that the node must have defined multiple values and
1620     // CombineTo was used.  Since CombineTo takes care of the worklist
1621     // mechanics for us, we have no work to do in this case.
1622     if (RV.getNode() == N)
1623       continue;
1624
1625     assert(N->getOpcode() != ISD::DELETED_NODE &&
1626            RV.getOpcode() != ISD::DELETED_NODE &&
1627            "Node was deleted but visit returned new node!");
1628
1629     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1630
1631     if (N->getNumValues() == RV.getNode()->getNumValues())
1632       DAG.ReplaceAllUsesWith(N, RV.getNode());
1633     else {
1634       assert(N->getValueType(0) == RV.getValueType() &&
1635              N->getNumValues() == 1 && "Type mismatch");
1636       DAG.ReplaceAllUsesWith(N, &RV);
1637     }
1638
1639     // Push the new node and any users onto the worklist
1640     AddToWorklist(RV.getNode());
1641     AddUsersToWorklist(RV.getNode());
1642
1643     // Finally, if the node is now dead, remove it from the graph.  The node
1644     // may not be dead if the replacement process recursively simplified to
1645     // something else needing this node. This will also take care of adding any
1646     // operands which have lost a user to the worklist.
1647     recursivelyDeleteUnusedNodes(N);
1648   }
1649
1650   // If the root changed (e.g. it was a dead load, update the root).
1651   DAG.setRoot(Dummy.getValue());
1652   DAG.RemoveDeadNodes();
1653 }
1654
1655 SDValue DAGCombiner::visit(SDNode *N) {
1656   switch (N->getOpcode()) {
1657   default: break;
1658   case ISD::TokenFactor:        return visitTokenFactor(N);
1659   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1660   case ISD::ADD:                return visitADD(N);
1661   case ISD::SUB:                return visitSUB(N);
1662   case ISD::SADDSAT:
1663   case ISD::UADDSAT:            return visitADDSAT(N);
1664   case ISD::SSUBSAT:
1665   case ISD::USUBSAT:            return visitSUBSAT(N);
1666   case ISD::ADDC:               return visitADDC(N);
1667   case ISD::SADDO:
1668   case ISD::UADDO:              return visitADDO(N);
1669   case ISD::SUBC:               return visitSUBC(N);
1670   case ISD::SSUBO:
1671   case ISD::USUBO:              return visitSUBO(N);
1672   case ISD::ADDE:               return visitADDE(N);
1673   case ISD::ADDCARRY:           return visitADDCARRY(N);
1674   case ISD::SUBE:               return visitSUBE(N);
1675   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1676   case ISD::MUL:                return visitMUL(N);
1677   case ISD::SDIV:               return visitSDIV(N);
1678   case ISD::UDIV:               return visitUDIV(N);
1679   case ISD::SREM:
1680   case ISD::UREM:               return visitREM(N);
1681   case ISD::MULHU:              return visitMULHU(N);
1682   case ISD::MULHS:              return visitMULHS(N);
1683   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1684   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1685   case ISD::SMULO:
1686   case ISD::UMULO:              return visitMULO(N);
1687   case ISD::SMIN:
1688   case ISD::SMAX:
1689   case ISD::UMIN:
1690   case ISD::UMAX:               return visitIMINMAX(N);
1691   case ISD::AND:                return visitAND(N);
1692   case ISD::OR:                 return visitOR(N);
1693   case ISD::XOR:                return visitXOR(N);
1694   case ISD::SHL:                return visitSHL(N);
1695   case ISD::SRA:                return visitSRA(N);
1696   case ISD::SRL:                return visitSRL(N);
1697   case ISD::ROTR:
1698   case ISD::ROTL:               return visitRotate(N);
1699   case ISD::FSHL:
1700   case ISD::FSHR:               return visitFunnelShift(N);
1701   case ISD::ABS:                return visitABS(N);
1702   case ISD::BSWAP:              return visitBSWAP(N);
1703   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1704   case ISD::CTLZ:               return visitCTLZ(N);
1705   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1706   case ISD::CTTZ:               return visitCTTZ(N);
1707   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1708   case ISD::CTPOP:              return visitCTPOP(N);
1709   case ISD::SELECT:             return visitSELECT(N);
1710   case ISD::VSELECT:            return visitVSELECT(N);
1711   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1712   case ISD::SETCC:              return visitSETCC(N);
1713   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1714   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1715   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1716   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1717   case ISD::AssertSext:
1718   case ISD::AssertZext:         return visitAssertExt(N);
1719   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1720   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1721   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1722   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1723   case ISD::BITCAST:            return visitBITCAST(N);
1724   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1725   case ISD::FADD:               return visitFADD(N);
1726   case ISD::FSUB:               return visitFSUB(N);
1727   case ISD::FMUL:               return visitFMUL(N);
1728   case ISD::FMA:                return visitFMA(N);
1729   case ISD::FDIV:               return visitFDIV(N);
1730   case ISD::FREM:               return visitFREM(N);
1731   case ISD::FSQRT:              return visitFSQRT(N);
1732   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1733   case ISD::FPOW:               return visitFPOW(N);
1734   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1735   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1736   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1737   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1738   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1739   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1740   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1741   case ISD::FNEG:               return visitFNEG(N);
1742   case ISD::FABS:               return visitFABS(N);
1743   case ISD::FFLOOR:             return visitFFLOOR(N);
1744   case ISD::FMINNUM:            return visitFMINNUM(N);
1745   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1746   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1747   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1748   case ISD::FCEIL:              return visitFCEIL(N);
1749   case ISD::FTRUNC:             return visitFTRUNC(N);
1750   case ISD::BRCOND:             return visitBRCOND(N);
1751   case ISD::BR_CC:              return visitBR_CC(N);
1752   case ISD::LOAD:               return visitLOAD(N);
1753   case ISD::STORE:              return visitSTORE(N);
1754   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1755   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1756   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1757   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1758   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1759   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1760   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1761   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1762   case ISD::MGATHER:            return visitMGATHER(N);
1763   case ISD::MLOAD:              return visitMLOAD(N);
1764   case ISD::MSCATTER:           return visitMSCATTER(N);
1765   case ISD::MSTORE:             return visitMSTORE(N);
1766   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1767   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1768   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1769   case ISD::VECREDUCE_FADD:
1770   case ISD::VECREDUCE_FMUL:
1771   case ISD::VECREDUCE_ADD:
1772   case ISD::VECREDUCE_MUL:
1773   case ISD::VECREDUCE_AND:
1774   case ISD::VECREDUCE_OR:
1775   case ISD::VECREDUCE_XOR:
1776   case ISD::VECREDUCE_SMAX:
1777   case ISD::VECREDUCE_SMIN:
1778   case ISD::VECREDUCE_UMAX:
1779   case ISD::VECREDUCE_UMIN:
1780   case ISD::VECREDUCE_FMAX:
1781   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1782   }
1783   return SDValue();
1784 }
1785
1786 SDValue DAGCombiner::combine(SDNode *N) {
1787   SDValue RV = visit(N);
1788
1789   // If nothing happened, try a target-specific DAG combine.
1790   if (!RV.getNode()) {
1791     assert(N->getOpcode() != ISD::DELETED_NODE &&
1792            "Node was deleted but visit returned NULL!");
1793
1794     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1795         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1796
1797       // Expose the DAG combiner to the target combiner impls.
1798       TargetLowering::DAGCombinerInfo
1799         DagCombineInfo(DAG, Level, false, this);
1800
1801       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1802     }
1803   }
1804
1805   // If nothing happened still, try promoting the operation.
1806   if (!RV.getNode()) {
1807     switch (N->getOpcode()) {
1808     default: break;
1809     case ISD::ADD:
1810     case ISD::SUB:
1811     case ISD::MUL:
1812     case ISD::AND:
1813     case ISD::OR:
1814     case ISD::XOR:
1815       RV = PromoteIntBinOp(SDValue(N, 0));
1816       break;
1817     case ISD::SHL:
1818     case ISD::SRA:
1819     case ISD::SRL:
1820       RV = PromoteIntShiftOp(SDValue(N, 0));
1821       break;
1822     case ISD::SIGN_EXTEND:
1823     case ISD::ZERO_EXTEND:
1824     case ISD::ANY_EXTEND:
1825       RV = PromoteExtend(SDValue(N, 0));
1826       break;
1827     case ISD::LOAD:
1828       if (PromoteLoad(SDValue(N, 0)))
1829         RV = SDValue(N, 0);
1830       break;
1831     }
1832   }
1833
1834   // If N is a commutative binary node, try to eliminate it if the commuted
1835   // version is already present in the DAG.
1836   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1837       N->getNumValues() == 1) {
1838     SDValue N0 = N->getOperand(0);
1839     SDValue N1 = N->getOperand(1);
1840
1841     // Constant operands are canonicalized to RHS.
1842     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1843       SDValue Ops[] = {N1, N0};
1844       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1845                                             N->getFlags());
1846       if (CSENode)
1847         return SDValue(CSENode, 0);
1848     }
1849   }
1850
1851   return RV;
1852 }
1853
1854 /// Given a node, return its input chain if it has one, otherwise return a null
1855 /// sd operand.
1856 static SDValue getInputChainForNode(SDNode *N) {
1857   if (unsigned NumOps = N->getNumOperands()) {
1858     if (N->getOperand(0).getValueType() == MVT::Other)
1859       return N->getOperand(0);
1860     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1861       return N->getOperand(NumOps-1);
1862     for (unsigned i = 1; i < NumOps-1; ++i)
1863       if (N->getOperand(i).getValueType() == MVT::Other)
1864         return N->getOperand(i);
1865   }
1866   return SDValue();
1867 }
1868
1869 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1870   // If N has two operands, where one has an input chain equal to the other,
1871   // the 'other' chain is redundant.
1872   if (N->getNumOperands() == 2) {
1873     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1874       return N->getOperand(0);
1875     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1876       return N->getOperand(1);
1877   }
1878
1879   // Don't simplify token factors if optnone.
1880   if (OptLevel == CodeGenOpt::None)
1881     return SDValue();
1882
1883   // If the sole user is a token factor, we should make sure we have a
1884   // chance to merge them together. This prevents TF chains from inhibiting
1885   // optimizations.
1886   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1887     AddToWorklist(*(N->use_begin()));
1888
1889   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1890   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1891   SmallPtrSet<SDNode*, 16> SeenOps;
1892   bool Changed = false;             // If we should replace this token factor.
1893
1894   // Start out with this token factor.
1895   TFs.push_back(N);
1896
1897   // Iterate through token factors.  The TFs grows when new token factors are
1898   // encountered.
1899   for (unsigned i = 0; i < TFs.size(); ++i) {
1900     // Limit number of nodes to inline, to avoid quadratic compile times.
1901     // We have to add the outstanding Token Factors to Ops, otherwise we might
1902     // drop Ops from the resulting Token Factors.
1903     if (Ops.size() > TokenFactorInlineLimit) {
1904       for (unsigned j = i; j < TFs.size(); j++)
1905         Ops.emplace_back(TFs[j], 0);
1906       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1907       // combiner worklist later.
1908       TFs.resize(i);
1909       break;
1910     }
1911
1912     SDNode *TF = TFs[i];
1913     // Check each of the operands.
1914     for (const SDValue &Op : TF->op_values()) {
1915       switch (Op.getOpcode()) {
1916       case ISD::EntryToken:
1917         // Entry tokens don't need to be added to the list. They are
1918         // redundant.
1919         Changed = true;
1920         break;
1921
1922       case ISD::TokenFactor:
1923         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1924           // Queue up for processing.
1925           TFs.push_back(Op.getNode());
1926           Changed = true;
1927           break;
1928         }
1929         LLVM_FALLTHROUGH;
1930
1931       default:
1932         // Only add if it isn't already in the list.
1933         if (SeenOps.insert(Op.getNode()).second)
1934           Ops.push_back(Op);
1935         else
1936           Changed = true;
1937         break;
1938       }
1939     }
1940   }
1941
1942   // Re-visit inlined Token Factors, to clean them up in case they have been
1943   // removed. Skip the first Token Factor, as this is the current node.
1944   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1945     AddToWorklist(TFs[i]);
1946
1947   // Remove Nodes that are chained to another node in the list. Do so
1948   // by walking up chains breath-first stopping when we've seen
1949   // another operand. In general we must climb to the EntryNode, but we can exit
1950   // early if we find all remaining work is associated with just one operand as
1951   // no further pruning is possible.
1952
1953   // List of nodes to search through and original Ops from which they originate.
1954   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1955   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1956   SmallPtrSet<SDNode *, 16> SeenChains;
1957   bool DidPruneOps = false;
1958
1959   unsigned NumLeftToConsider = 0;
1960   for (const SDValue &Op : Ops) {
1961     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1962     OpWorkCount.push_back(1);
1963   }
1964
1965   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1966     // If this is an Op, we can remove the op from the list. Remark any
1967     // search associated with it as from the current OpNumber.
1968     if (SeenOps.count(Op) != 0) {
1969       Changed = true;
1970       DidPruneOps = true;
1971       unsigned OrigOpNumber = 0;
1972       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1973         OrigOpNumber++;
1974       assert((OrigOpNumber != Ops.size()) &&
1975              "expected to find TokenFactor Operand");
1976       // Re-mark worklist from OrigOpNumber to OpNumber
1977       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1978         if (Worklist[i].second == OrigOpNumber) {
1979           Worklist[i].second = OpNumber;
1980         }
1981       }
1982       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1983       OpWorkCount[OrigOpNumber] = 0;
1984       NumLeftToConsider--;
1985     }
1986     // Add if it's a new chain
1987     if (SeenChains.insert(Op).second) {
1988       OpWorkCount[OpNumber]++;
1989       Worklist.push_back(std::make_pair(Op, OpNumber));
1990     }
1991   };
1992
1993   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1994     // We need at least be consider at least 2 Ops to prune.
1995     if (NumLeftToConsider <= 1)
1996       break;
1997     auto CurNode = Worklist[i].first;
1998     auto CurOpNumber = Worklist[i].second;
1999     assert((OpWorkCount[CurOpNumber] > 0) &&
2000            "Node should not appear in worklist");
2001     switch (CurNode->getOpcode()) {
2002     case ISD::EntryToken:
2003       // Hitting EntryToken is the only way for the search to terminate without
2004       // hitting
2005       // another operand's search. Prevent us from marking this operand
2006       // considered.
2007       NumLeftToConsider++;
2008       break;
2009     case ISD::TokenFactor:
2010       for (const SDValue &Op : CurNode->op_values())
2011         AddToWorklist(i, Op.getNode(), CurOpNumber);
2012       break;
2013     case ISD::LIFETIME_START:
2014     case ISD::LIFETIME_END:
2015     case ISD::CopyFromReg:
2016     case ISD::CopyToReg:
2017       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2018       break;
2019     default:
2020       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2021         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2022       break;
2023     }
2024     OpWorkCount[CurOpNumber]--;
2025     if (OpWorkCount[CurOpNumber] == 0)
2026       NumLeftToConsider--;
2027   }
2028
2029   // If we've changed things around then replace token factor.
2030   if (Changed) {
2031     SDValue Result;
2032     if (Ops.empty()) {
2033       // The entry token is the only possible outcome.
2034       Result = DAG.getEntryNode();
2035     } else {
2036       if (DidPruneOps) {
2037         SmallVector<SDValue, 8> PrunedOps;
2038         //
2039         for (const SDValue &Op : Ops) {
2040           if (SeenChains.count(Op.getNode()) == 0)
2041             PrunedOps.push_back(Op);
2042         }
2043         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2044       } else {
2045         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2046       }
2047     }
2048     return Result;
2049   }
2050   return SDValue();
2051 }
2052
2053 /// MERGE_VALUES can always be eliminated.
2054 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2055   WorklistRemover DeadNodes(*this);
2056   // Replacing results may cause a different MERGE_VALUES to suddenly
2057   // be CSE'd with N, and carry its uses with it. Iterate until no
2058   // uses remain, to ensure that the node can be safely deleted.
2059   // First add the users of this node to the work list so that they
2060   // can be tried again once they have new operands.
2061   AddUsersToWorklist(N);
2062   do {
2063     // Do as a single replacement to avoid rewalking use lists.
2064     SmallVector<SDValue, 8> Ops;
2065     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2066       Ops.push_back(N->getOperand(i));
2067     DAG.ReplaceAllUsesWith(N, Ops.data());
2068   } while (!N->use_empty());
2069   deleteAndRecombine(N);
2070   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2071 }
2072
2073 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2074 /// ConstantSDNode pointer else nullptr.
2075 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2076   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2077   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2078 }
2079
2080 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2081   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2082          "Unexpected binary operator");
2083
2084   // Don't do this unless the old select is going away. We want to eliminate the
2085   // binary operator, not replace a binop with a select.
2086   // TODO: Handle ISD::SELECT_CC.
2087   unsigned SelOpNo = 0;
2088   SDValue Sel = BO->getOperand(0);
2089   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2090     SelOpNo = 1;
2091     Sel = BO->getOperand(1);
2092   }
2093
2094   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2095     return SDValue();
2096
2097   SDValue CT = Sel.getOperand(1);
2098   if (!isConstantOrConstantVector(CT, true) &&
2099       !isConstantFPBuildVectorOrConstantFP(CT))
2100     return SDValue();
2101
2102   SDValue CF = Sel.getOperand(2);
2103   if (!isConstantOrConstantVector(CF, true) &&
2104       !isConstantFPBuildVectorOrConstantFP(CF))
2105     return SDValue();
2106
2107   // Bail out if any constants are opaque because we can't constant fold those.
2108   // The exception is "and" and "or" with either 0 or -1 in which case we can
2109   // propagate non constant operands into select. I.e.:
2110   // and (select Cond, 0, -1), X --> select Cond, 0, X
2111   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2112   auto BinOpcode = BO->getOpcode();
2113   bool CanFoldNonConst =
2114       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2115       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2116       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2117
2118   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2119   if (!CanFoldNonConst &&
2120       !isConstantOrConstantVector(CBO, true) &&
2121       !isConstantFPBuildVectorOrConstantFP(CBO))
2122     return SDValue();
2123
2124   EVT VT = Sel.getValueType();
2125
2126   // In case of shift value and shift amount may have different VT. For instance
2127   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2128   // swapped operands and value types do not match. NB: x86 is fine if operands
2129   // are not swapped with shift amount VT being not bigger than shifted value.
2130   // TODO: that is possible to check for a shift operation, correct VTs and
2131   // still perform optimization on x86 if needed.
2132   if (SelOpNo && VT != CBO.getValueType())
2133     return SDValue();
2134
2135   // We have a select-of-constants followed by a binary operator with a
2136   // constant. Eliminate the binop by pulling the constant math into the select.
2137   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2138   SDLoc DL(Sel);
2139   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2140                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2141   if (!CanFoldNonConst && !NewCT.isUndef() &&
2142       !isConstantOrConstantVector(NewCT, true) &&
2143       !isConstantFPBuildVectorOrConstantFP(NewCT))
2144     return SDValue();
2145
2146   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2147                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2148   if (!CanFoldNonConst && !NewCF.isUndef() &&
2149       !isConstantOrConstantVector(NewCF, true) &&
2150       !isConstantFPBuildVectorOrConstantFP(NewCF))
2151     return SDValue();
2152
2153   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2154   SelectOp->setFlags(BO->getFlags());
2155   return SelectOp;
2156 }
2157
2158 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2159   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2160          "Expecting add or sub");
2161
2162   // Match a constant operand and a zext operand for the math instruction:
2163   // add Z, C
2164   // sub C, Z
2165   bool IsAdd = N->getOpcode() == ISD::ADD;
2166   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2167   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2168   auto *CN = dyn_cast<ConstantSDNode>(C);
2169   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2170     return SDValue();
2171
2172   // Match the zext operand as a setcc of a boolean.
2173   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2174       Z.getOperand(0).getValueType() != MVT::i1)
2175     return SDValue();
2176
2177   // Match the compare as: setcc (X & 1), 0, eq.
2178   SDValue SetCC = Z.getOperand(0);
2179   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2180   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2181       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2182       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2183     return SDValue();
2184
2185   // We are adding/subtracting a constant and an inverted low bit. Turn that
2186   // into a subtract/add of the low bit with incremented/decremented constant:
2187   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2188   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2189   EVT VT = C.getValueType();
2190   SDLoc DL(N);
2191   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2192   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2193                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2194   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2195 }
2196
2197 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2198 /// a shift and add with a different constant.
2199 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2200   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2201          "Expecting add or sub");
2202
2203   // We need a constant operand for the add/sub, and the other operand is a
2204   // logical shift right: add (srl), C or sub C, (srl).
2205   // TODO - support non-uniform vector amounts.
2206   bool IsAdd = N->getOpcode() == ISD::ADD;
2207   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2208   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2209   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2210   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2211     return SDValue();
2212
2213   // The shift must be of a 'not' value.
2214   SDValue Not = ShiftOp.getOperand(0);
2215   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2216     return SDValue();
2217
2218   // The shift must be moving the sign bit to the least-significant-bit.
2219   EVT VT = ShiftOp.getValueType();
2220   SDValue ShAmt = ShiftOp.getOperand(1);
2221   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2222   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2223     return SDValue();
2224
2225   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2226   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2227   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2228   SDLoc DL(N);
2229   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2230   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2231   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2232   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2233 }
2234
2235 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2236 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2237 /// are no common bits set in the operands).
2238 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2239   SDValue N0 = N->getOperand(0);
2240   SDValue N1 = N->getOperand(1);
2241   EVT VT = N0.getValueType();
2242   SDLoc DL(N);
2243
2244   // fold vector ops
2245   if (VT.isVector()) {
2246     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2247       return FoldedVOp;
2248
2249     // fold (add x, 0) -> x, vector edition
2250     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2251       return N0;
2252     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2253       return N1;
2254   }
2255
2256   // fold (add x, undef) -> undef
2257   if (N0.isUndef())
2258     return N0;
2259
2260   if (N1.isUndef())
2261     return N1;
2262
2263   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2264     // canonicalize constant to RHS
2265     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2266       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2267     // fold (add c1, c2) -> c1+c2
2268     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2269                                       N1.getNode());
2270   }
2271
2272   // fold (add x, 0) -> x
2273   if (isNullConstant(N1))
2274     return N0;
2275
2276   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2277     // fold ((A-c1)+c2) -> (A+(c2-c1))
2278     if (N0.getOpcode() == ISD::SUB &&
2279         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2280       SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2281                                                N0.getOperand(1).getNode());
2282       assert(Sub && "Constant folding failed");
2283       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2284     }
2285
2286     // fold ((c1-A)+c2) -> (c1+c2)-A
2287     if (N0.getOpcode() == ISD::SUB &&
2288         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2289       SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2290                                                N0.getOperand(0).getNode());
2291       assert(Add && "Constant folding failed");
2292       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2293     }
2294
2295     // add (sext i1 X), 1 -> zext (not i1 X)
2296     // We don't transform this pattern:
2297     //   add (zext i1 X), -1 -> sext (not i1 X)
2298     // because most (?) targets generate better code for the zext form.
2299     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2300         isOneOrOneSplat(N1)) {
2301       SDValue X = N0.getOperand(0);
2302       if ((!LegalOperations ||
2303            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2304             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2305           X.getScalarValueSizeInBits() == 1) {
2306         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2307         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2308       }
2309     }
2310
2311     // Undo the add -> or combine to merge constant offsets from a frame index.
2312     if (N0.getOpcode() == ISD::OR &&
2313         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2314         isa<ConstantSDNode>(N0.getOperand(1)) &&
2315         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2316       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2317       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2318     }
2319   }
2320
2321   if (SDValue NewSel = foldBinOpIntoSelect(N))
2322     return NewSel;
2323
2324   // reassociate add
2325   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2326     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2327       return RADD;
2328   }
2329   // fold ((0-A) + B) -> B-A
2330   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2331     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2332
2333   // fold (A + (0-B)) -> A-B
2334   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2335     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2336
2337   // fold (A+(B-A)) -> B
2338   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2339     return N1.getOperand(0);
2340
2341   // fold ((B-A)+A) -> B
2342   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2343     return N0.getOperand(0);
2344
2345   // fold ((A-B)+(C-A)) -> (C-B)
2346   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2347       N0.getOperand(0) == N1.getOperand(1))
2348     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2349                        N0.getOperand(1));
2350
2351   // fold ((A-B)+(B-C)) -> (A-C)
2352   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2353       N0.getOperand(1) == N1.getOperand(0))
2354     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2355                        N1.getOperand(1));
2356
2357   // fold (A+(B-(A+C))) to (B-C)
2358   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2359       N0 == N1.getOperand(1).getOperand(0))
2360     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2361                        N1.getOperand(1).getOperand(1));
2362
2363   // fold (A+(B-(C+A))) to (B-C)
2364   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2365       N0 == N1.getOperand(1).getOperand(1))
2366     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2367                        N1.getOperand(1).getOperand(0));
2368
2369   // fold (A+((B-A)+or-C)) to (B+or-C)
2370   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2371       N1.getOperand(0).getOpcode() == ISD::SUB &&
2372       N0 == N1.getOperand(0).getOperand(1))
2373     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2374                        N1.getOperand(1));
2375
2376   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2377   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2378     SDValue N00 = N0.getOperand(0);
2379     SDValue N01 = N0.getOperand(1);
2380     SDValue N10 = N1.getOperand(0);
2381     SDValue N11 = N1.getOperand(1);
2382
2383     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2384       return DAG.getNode(ISD::SUB, DL, VT,
2385                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2386                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2387   }
2388
2389   // fold (add (umax X, C), -C) --> (usubsat X, C)
2390   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2391     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2392       return (!Max && !Op) ||
2393              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2394     };
2395     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2396                                   /*AllowUndefs*/ true))
2397       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2398                          N0.getOperand(1));
2399   }
2400
2401   if (SimplifyDemandedBits(SDValue(N, 0)))
2402     return SDValue(N, 0);
2403
2404   if (isOneOrOneSplat(N1)) {
2405     // fold (add (xor a, -1), 1) -> (sub 0, a)
2406     if (isBitwiseNot(N0))
2407       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2408                          N0.getOperand(0));
2409
2410     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2411     if (N0.getOpcode() == ISD::ADD ||
2412         N0.getOpcode() == ISD::UADDO ||
2413         N0.getOpcode() == ISD::SADDO) {
2414       SDValue A, Xor;
2415
2416       if (isBitwiseNot(N0.getOperand(0))) {
2417         A = N0.getOperand(1);
2418         Xor = N0.getOperand(0);
2419       } else if (isBitwiseNot(N0.getOperand(1))) {
2420         A = N0.getOperand(0);
2421         Xor = N0.getOperand(1);
2422       }
2423
2424       if (Xor)
2425         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2426     }
2427
2428     // Look for:
2429     //   add (add x, y), 1
2430     // And if the target does not like this form then turn into:
2431     //   sub y, (xor x, -1)
2432     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2433         N0.getOpcode() == ISD::ADD) {
2434       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2435                                 DAG.getAllOnesConstant(DL, VT));
2436       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2437     }
2438   }
2439
2440   // (x - y) + -1  ->  add (xor y, -1), x
2441   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2442       isAllOnesOrAllOnesSplat(N1)) {
2443     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2444     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2445   }
2446
2447   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2448     return Combined;
2449
2450   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2451     return Combined;
2452
2453   return SDValue();
2454 }
2455
2456 SDValue DAGCombiner::visitADD(SDNode *N) {
2457   SDValue N0 = N->getOperand(0);
2458   SDValue N1 = N->getOperand(1);
2459   EVT VT = N0.getValueType();
2460   SDLoc DL(N);
2461
2462   if (SDValue Combined = visitADDLike(N))
2463     return Combined;
2464
2465   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2466     return V;
2467
2468   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2469     return V;
2470
2471   // fold (a+b) -> (a|b) iff a and b share no bits.
2472   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2473       DAG.haveNoCommonBitsSet(N0, N1))
2474     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2475
2476   return SDValue();
2477 }
2478
2479 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2480   unsigned Opcode = N->getOpcode();
2481   SDValue N0 = N->getOperand(0);
2482   SDValue N1 = N->getOperand(1);
2483   EVT VT = N0.getValueType();
2484   SDLoc DL(N);
2485
2486   // fold vector ops
2487   if (VT.isVector()) {
2488     // TODO SimplifyVBinOp
2489
2490     // fold (add_sat x, 0) -> x, vector edition
2491     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2492       return N0;
2493     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2494       return N1;
2495   }
2496
2497   // fold (add_sat x, undef) -> -1
2498   if (N0.isUndef() || N1.isUndef())
2499     return DAG.getAllOnesConstant(DL, VT);
2500
2501   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2502     // canonicalize constant to RHS
2503     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2504       return DAG.getNode(Opcode, DL, VT, N1, N0);
2505     // fold (add_sat c1, c2) -> c3
2506     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2507                                       N1.getNode());
2508   }
2509
2510   // fold (add_sat x, 0) -> x
2511   if (isNullConstant(N1))
2512     return N0;
2513
2514   // If it cannot overflow, transform into an add.
2515   if (Opcode == ISD::UADDSAT)
2516     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2517       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2518
2519   return SDValue();
2520 }
2521
2522 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2523   bool Masked = false;
2524
2525   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2526   while (true) {
2527     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2528       V = V.getOperand(0);
2529       continue;
2530     }
2531
2532     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2533       Masked = true;
2534       V = V.getOperand(0);
2535       continue;
2536     }
2537
2538     break;
2539   }
2540
2541   // If this is not a carry, return.
2542   if (V.getResNo() != 1)
2543     return SDValue();
2544
2545   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2546       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2547     return SDValue();
2548
2549   EVT VT = V.getNode()->getValueType(0);
2550   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2551     return SDValue();
2552
2553   // If the result is masked, then no matter what kind of bool it is we can
2554   // return. If it isn't, then we need to make sure the bool type is either 0 or
2555   // 1 and not other values.
2556   if (Masked ||
2557       TLI.getBooleanContents(V.getValueType()) ==
2558           TargetLoweringBase::ZeroOrOneBooleanContent)
2559     return V;
2560
2561   return SDValue();
2562 }
2563
2564 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2565 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2566 /// the opcode and bypass the mask operation.
2567 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2568                                  SelectionDAG &DAG, const SDLoc &DL) {
2569   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2570     return SDValue();
2571
2572   EVT VT = N0.getValueType();
2573   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2574     return SDValue();
2575
2576   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2577   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2578   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2579 }
2580
2581 /// Helper for doing combines based on N0 and N1 being added to each other.
2582 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2583                                           SDNode *LocReference) {
2584   EVT VT = N0.getValueType();
2585   SDLoc DL(LocReference);
2586
2587   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2588   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2589       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2590     return DAG.getNode(ISD::SUB, DL, VT, N0,
2591                        DAG.getNode(ISD::SHL, DL, VT,
2592                                    N1.getOperand(0).getOperand(1),
2593                                    N1.getOperand(1)));
2594
2595   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2596     return V;
2597
2598   // Look for:
2599   //   add (add x, 1), y
2600   // And if the target does not like this form then turn into:
2601   //   sub y, (xor x, -1)
2602   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2603       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2604     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2605                               DAG.getAllOnesConstant(DL, VT));
2606     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2607   }
2608
2609   // Hoist one-use subtraction by non-opaque constant:
2610   //   (x - C) + y  ->  (x + y) - C
2611   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2612   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2613       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2614     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2615     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2616   }
2617   // Hoist one-use subtraction from non-opaque constant:
2618   //   (C - x) + y  ->  (y - x) + C
2619   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2620       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2621     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2622     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2623   }
2624
2625   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2626   // rather than 'add 0/-1' (the zext should get folded).
2627   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2628   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2629       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2630       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2631     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2632     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2633   }
2634
2635   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2636   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2637     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2638     if (TN->getVT() == MVT::i1) {
2639       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2640                                  DAG.getConstant(1, DL, VT));
2641       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2642     }
2643   }
2644
2645   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2646   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2647       N1.getResNo() == 0)
2648     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2649                        N0, N1.getOperand(0), N1.getOperand(2));
2650
2651   // (add X, Carry) -> (addcarry X, 0, Carry)
2652   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2653     if (SDValue Carry = getAsCarry(TLI, N1))
2654       return DAG.getNode(ISD::ADDCARRY, DL,
2655                          DAG.getVTList(VT, Carry.getValueType()), N0,
2656                          DAG.getConstant(0, DL, VT), Carry);
2657
2658   return SDValue();
2659 }
2660
2661 SDValue DAGCombiner::visitADDC(SDNode *N) {
2662   SDValue N0 = N->getOperand(0);
2663   SDValue N1 = N->getOperand(1);
2664   EVT VT = N0.getValueType();
2665   SDLoc DL(N);
2666
2667   // If the flag result is dead, turn this into an ADD.
2668   if (!N->hasAnyUseOfValue(1))
2669     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2670                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2671
2672   // canonicalize constant to RHS.
2673   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2674   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2675   if (N0C && !N1C)
2676     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2677
2678   // fold (addc x, 0) -> x + no carry out
2679   if (isNullConstant(N1))
2680     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2681                                         DL, MVT::Glue));
2682
2683   // If it cannot overflow, transform into an add.
2684   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2685     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2686                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2687
2688   return SDValue();
2689 }
2690
2691 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2692                            SelectionDAG &DAG, const TargetLowering &TLI) {
2693   EVT VT = V.getValueType();
2694
2695   SDValue Cst;
2696   switch (TLI.getBooleanContents(VT)) {
2697   case TargetLowering::ZeroOrOneBooleanContent:
2698   case TargetLowering::UndefinedBooleanContent:
2699     Cst = DAG.getConstant(1, DL, VT);
2700     break;
2701   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2702     Cst = DAG.getAllOnesConstant(DL, VT);
2703     break;
2704   }
2705
2706   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2707 }
2708
2709 /**
2710  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2711  * then the flip also occurs if computing the inverse is the same cost.
2712  * This function returns an empty SDValue in case it cannot flip the boolean
2713  * without increasing the cost of the computation. If you want to flip a boolean
2714  * no matter what, use flipBoolean.
2715  */
2716 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2717                                   const TargetLowering &TLI,
2718                                   bool Force) {
2719   if (Force && isa<ConstantSDNode>(V))
2720     return flipBoolean(V, SDLoc(V), DAG, TLI);
2721
2722   if (V.getOpcode() != ISD::XOR)
2723     return SDValue();
2724
2725   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2726   if (!Const)
2727     return SDValue();
2728
2729   EVT VT = V.getValueType();
2730
2731   bool IsFlip = false;
2732   switch(TLI.getBooleanContents(VT)) {
2733     case TargetLowering::ZeroOrOneBooleanContent:
2734       IsFlip = Const->isOne();
2735       break;
2736     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2737       IsFlip = Const->isAllOnesValue();
2738       break;
2739     case TargetLowering::UndefinedBooleanContent:
2740       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2741       break;
2742   }
2743
2744   if (IsFlip)
2745     return V.getOperand(0);
2746   if (Force)
2747     return flipBoolean(V, SDLoc(V), DAG, TLI);
2748   return SDValue();
2749 }
2750
2751 SDValue DAGCombiner::visitADDO(SDNode *N) {
2752   SDValue N0 = N->getOperand(0);
2753   SDValue N1 = N->getOperand(1);
2754   EVT VT = N0.getValueType();
2755   bool IsSigned = (ISD::SADDO == N->getOpcode());
2756
2757   EVT CarryVT = N->getValueType(1);
2758   SDLoc DL(N);
2759
2760   // If the flag result is dead, turn this into an ADD.
2761   if (!N->hasAnyUseOfValue(1))
2762     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2763                      DAG.getUNDEF(CarryVT));
2764
2765   // canonicalize constant to RHS.
2766   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2767       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2768     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2769
2770   // fold (addo x, 0) -> x + no carry out
2771   if (isNullOrNullSplat(N1))
2772     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2773
2774   if (!IsSigned) {
2775     // If it cannot overflow, transform into an add.
2776     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2777       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2778                        DAG.getConstant(0, DL, CarryVT));
2779
2780     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2781     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2782       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2783                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2784       return CombineTo(N, Sub,
2785                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2786     }
2787
2788     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2789       return Combined;
2790
2791     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2792       return Combined;
2793   }
2794
2795   return SDValue();
2796 }
2797
2798 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2799   EVT VT = N0.getValueType();
2800   if (VT.isVector())
2801     return SDValue();
2802
2803   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2804   // If Y + 1 cannot overflow.
2805   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2806     SDValue Y = N1.getOperand(0);
2807     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2808     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2809       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2810                          N1.getOperand(2));
2811   }
2812
2813   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2814   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2815     if (SDValue Carry = getAsCarry(TLI, N1))
2816       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2817                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2818
2819   return SDValue();
2820 }
2821
2822 SDValue DAGCombiner::visitADDE(SDNode *N) {
2823   SDValue N0 = N->getOperand(0);
2824   SDValue N1 = N->getOperand(1);
2825   SDValue CarryIn = N->getOperand(2);
2826
2827   // canonicalize constant to RHS
2828   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2829   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2830   if (N0C && !N1C)
2831     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2832                        N1, N0, CarryIn);
2833
2834   // fold (adde x, y, false) -> (addc x, y)
2835   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2836     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2837
2838   return SDValue();
2839 }
2840
2841 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2842   SDValue N0 = N->getOperand(0);
2843   SDValue N1 = N->getOperand(1);
2844   SDValue CarryIn = N->getOperand(2);
2845   SDLoc DL(N);
2846
2847   // canonicalize constant to RHS
2848   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2849   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2850   if (N0C && !N1C)
2851     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2852
2853   // fold (addcarry x, y, false) -> (uaddo x, y)
2854   if (isNullConstant(CarryIn)) {
2855     if (!LegalOperations ||
2856         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2857       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2858   }
2859
2860   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2861   if (isNullConstant(N0) && isNullConstant(N1)) {
2862     EVT VT = N0.getValueType();
2863     EVT CarryVT = CarryIn.getValueType();
2864     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2865     AddToWorklist(CarryExt.getNode());
2866     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2867                                     DAG.getConstant(1, DL, VT)),
2868                      DAG.getConstant(0, DL, CarryVT));
2869   }
2870
2871   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2872     return Combined;
2873
2874   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2875     return Combined;
2876
2877   return SDValue();
2878 }
2879
2880 /**
2881  * If we are facing some sort of diamond carry propapagtion pattern try to
2882  * break it up to generate something like:
2883  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2884  *
2885  * The end result is usually an increase in operation required, but because the
2886  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2887  *
2888  * Patterns typically look something like
2889  *            (uaddo A, B)
2890  *             /       \
2891  *          Carry      Sum
2892  *            |          \
2893  *            | (addcarry *, 0, Z)
2894  *            |       /
2895  *             \   Carry
2896  *              |   /
2897  * (addcarry X, *, *)
2898  *
2899  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2900  * produce a combine with a single path for carry propagation.
2901  */
2902 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2903                                       SDValue X, SDValue Carry0, SDValue Carry1,
2904                                       SDNode *N) {
2905   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2906     return SDValue();
2907   if (Carry1.getOpcode() != ISD::UADDO)
2908     return SDValue();
2909
2910   SDValue Z;
2911
2912   /**
2913    * First look for a suitable Z. It will present itself in the form of
2914    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2915    */
2916   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2917       isNullConstant(Carry0.getOperand(1))) {
2918     Z = Carry0.getOperand(2);
2919   } else if (Carry0.getOpcode() == ISD::UADDO &&
2920              isOneConstant(Carry0.getOperand(1))) {
2921     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2922     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2923   } else {
2924     // We couldn't find a suitable Z.
2925     return SDValue();
2926   }
2927
2928
2929   auto cancelDiamond = [&](SDValue A,SDValue B) {
2930     SDLoc DL(N);
2931     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2932     Combiner.AddToWorklist(NewY.getNode());
2933     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2934                        DAG.getConstant(0, DL, X.getValueType()),
2935                        NewY.getValue(1));
2936   };
2937
2938   /**
2939    *      (uaddo A, B)
2940    *           |
2941    *          Sum
2942    *           |
2943    * (addcarry *, 0, Z)
2944    */
2945   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2946     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2947   }
2948
2949   /**
2950    * (addcarry A, 0, Z)
2951    *         |
2952    *        Sum
2953    *         |
2954    *  (uaddo *, B)
2955    */
2956   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2957     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2958   }
2959
2960   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2961     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2962   }
2963
2964   return SDValue();
2965 }
2966
2967 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2968                                        SDNode *N) {
2969   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2970   if (isBitwiseNot(N0))
2971     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2972       SDLoc DL(N);
2973       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2974                                 N0.getOperand(0), NotC);
2975       return CombineTo(N, Sub,
2976                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2977     }
2978
2979   // Iff the flag result is dead:
2980   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2981   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2982   // or the dependency between the instructions.
2983   if ((N0.getOpcode() == ISD::ADD ||
2984        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2985         N0.getValue(1) != CarryIn)) &&
2986       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2987     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2988                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2989
2990   /**
2991    * When one of the addcarry argument is itself a carry, we may be facing
2992    * a diamond carry propagation. In which case we try to transform the DAG
2993    * to ensure linear carry propagation if that is possible.
2994    */
2995   if (auto Y = getAsCarry(TLI, N1)) {
2996     // Because both are carries, Y and Z can be swapped.
2997     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2998       return R;
2999     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3000       return R;
3001   }
3002
3003   return SDValue();
3004 }
3005
3006 // Since it may not be valid to emit a fold to zero for vector initializers
3007 // check if we can before folding.
3008 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3009                              SelectionDAG &DAG, bool LegalOperations) {
3010   if (!VT.isVector())
3011     return DAG.getConstant(0, DL, VT);
3012   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3013     return DAG.getConstant(0, DL, VT);
3014   return SDValue();
3015 }
3016
3017 SDValue DAGCombiner::visitSUB(SDNode *N) {
3018   SDValue N0 = N->getOperand(0);
3019   SDValue N1 = N->getOperand(1);
3020   EVT VT = N0.getValueType();
3021   SDLoc DL(N);
3022
3023   // fold vector ops
3024   if (VT.isVector()) {
3025     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3026       return FoldedVOp;
3027
3028     // fold (sub x, 0) -> x, vector edition
3029     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3030       return N0;
3031   }
3032
3033   // fold (sub x, x) -> 0
3034   // FIXME: Refactor this and xor and other similar operations together.
3035   if (N0 == N1)
3036     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3037   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3038       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3039     // fold (sub c1, c2) -> c1-c2
3040     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3041                                       N1.getNode());
3042   }
3043
3044   if (SDValue NewSel = foldBinOpIntoSelect(N))
3045     return NewSel;
3046
3047   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3048
3049   // fold (sub x, c) -> (add x, -c)
3050   if (N1C) {
3051     return DAG.getNode(ISD::ADD, DL, VT, N0,
3052                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3053   }
3054
3055   if (isNullOrNullSplat(N0)) {
3056     unsigned BitWidth = VT.getScalarSizeInBits();
3057     // Right-shifting everything out but the sign bit followed by negation is
3058     // the same as flipping arithmetic/logical shift type without the negation:
3059     // -(X >>u 31) -> (X >>s 31)
3060     // -(X >>s 31) -> (X >>u 31)
3061     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3062       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3063       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3064         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3065         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3066           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3067       }
3068     }
3069
3070     // 0 - X --> 0 if the sub is NUW.
3071     if (N->getFlags().hasNoUnsignedWrap())
3072       return N0;
3073
3074     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3075       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3076       // N1 must be 0 because negating the minimum signed value is undefined.
3077       if (N->getFlags().hasNoSignedWrap())
3078         return N0;
3079
3080       // 0 - X --> X if X is 0 or the minimum signed value.
3081       return N1;
3082     }
3083   }
3084
3085   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3086   if (isAllOnesOrAllOnesSplat(N0))
3087     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3088
3089   // fold (A - (0-B)) -> A+B
3090   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3091     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3092
3093   // fold A-(A-B) -> B
3094   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3095     return N1.getOperand(1);
3096
3097   // fold (A+B)-A -> B
3098   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3099     return N0.getOperand(1);
3100
3101   // fold (A+B)-B -> A
3102   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3103     return N0.getOperand(0);
3104
3105   // fold (A+C1)-C2 -> A+(C1-C2)
3106   if (N0.getOpcode() == ISD::ADD &&
3107       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3108       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3109     SDValue NewC = DAG.FoldConstantArithmetic(
3110         ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3111     assert(NewC && "Constant folding failed");
3112     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3113   }
3114
3115   // fold C2-(A+C1) -> (C2-C1)-A
3116   if (N1.getOpcode() == ISD::ADD) {
3117     SDValue N11 = N1.getOperand(1);
3118     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3119         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3120       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3121                                                 N11.getNode());
3122       assert(NewC && "Constant folding failed");
3123       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3124     }
3125   }
3126
3127   // fold (A-C1)-C2 -> A-(C1+C2)
3128   if (N0.getOpcode() == ISD::SUB &&
3129       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3130       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3131     SDValue NewC = DAG.FoldConstantArithmetic(
3132         ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3133     assert(NewC && "Constant folding failed");
3134     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3135   }
3136
3137   // fold (c1-A)-c2 -> (c1-c2)-A
3138   if (N0.getOpcode() == ISD::SUB &&
3139       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3140       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3141     SDValue NewC = DAG.FoldConstantArithmetic(
3142         ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
3143     assert(NewC && "Constant folding failed");
3144     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3145   }
3146
3147   // fold ((A+(B+or-C))-B) -> A+or-C
3148   if (N0.getOpcode() == ISD::ADD &&
3149       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3150        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3151       N0.getOperand(1).getOperand(0) == N1)
3152     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3153                        N0.getOperand(1).getOperand(1));
3154
3155   // fold ((A+(C+B))-B) -> A+C
3156   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3157       N0.getOperand(1).getOperand(1) == N1)
3158     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3159                        N0.getOperand(1).getOperand(0));
3160
3161   // fold ((A-(B-C))-C) -> A-B
3162   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3163       N0.getOperand(1).getOperand(1) == N1)
3164     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3165                        N0.getOperand(1).getOperand(0));
3166
3167   // fold (A-(B-C)) -> A+(C-B)
3168   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3169     return DAG.getNode(ISD::ADD, DL, VT, N0,
3170                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3171                                    N1.getOperand(0)));
3172
3173   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3174   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3175     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3176         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3177       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3178                                 N1.getOperand(0).getOperand(1),
3179                                 N1.getOperand(1));
3180       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3181     }
3182     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3183         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3184       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3185                                 N1.getOperand(0),
3186                                 N1.getOperand(1).getOperand(1));
3187       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3188     }
3189   }
3190
3191   // If either operand of a sub is undef, the result is undef
3192   if (N0.isUndef())
3193     return N0;
3194   if (N1.isUndef())
3195     return N1;
3196
3197   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3198     return V;
3199
3200   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3201     return V;
3202
3203   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3204     return V;
3205
3206   // (x - y) - 1  ->  add (xor y, -1), x
3207   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3208     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3209                               DAG.getAllOnesConstant(DL, VT));
3210     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3211   }
3212
3213   // Look for:
3214   //   sub y, (xor x, -1)
3215   // And if the target does not like this form then turn into:
3216   //   add (add x, y), 1
3217   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3218     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3219     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3220   }
3221
3222   // Hoist one-use addition by non-opaque constant:
3223   //   (x + C) - y  ->  (x - y) + C
3224   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3225       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3226     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3227     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3228   }
3229   // y - (x + C)  ->  (y - x) - C
3230   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3231       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3232     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3233     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3234   }
3235   // (x - C) - y  ->  (x - y) - C
3236   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3237   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3238       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3239     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3240     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3241   }
3242   // (C - x) - y  ->  C - (x + y)
3243   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3244       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3245     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3246     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3247   }
3248
3249   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3250   // rather than 'sub 0/1' (the sext should get folded).
3251   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3252   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3253       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3254       TLI.getBooleanContents(VT) ==
3255           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3256     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3257     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3258   }
3259
3260   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3261   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3262     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3263       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3264       SDValue S0 = N1.getOperand(0);
3265       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3266         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3267         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3268           if (C->getAPIntValue() == (OpSizeInBits - 1))
3269             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3270       }
3271     }
3272   }
3273
3274   // If the relocation model supports it, consider symbol offsets.
3275   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3276     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3277       // fold (sub Sym, c) -> Sym-c
3278       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3279         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3280                                     GA->getOffset() -
3281                                         (uint64_t)N1C->getSExtValue());
3282       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3283       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3284         if (GA->getGlobal() == GB->getGlobal())
3285           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3286                                  DL, VT);
3287     }
3288
3289   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3290   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3291     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3292     if (TN->getVT() == MVT::i1) {
3293       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3294                                  DAG.getConstant(1, DL, VT));
3295       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3296     }
3297   }
3298
3299   // Prefer an add for more folding potential and possibly better codegen:
3300   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3301   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3302     SDValue ShAmt = N1.getOperand(1);
3303     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3304     if (ShAmtC &&
3305         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3306       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3307       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3308     }
3309   }
3310
3311   return SDValue();
3312 }
3313
3314 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3315   SDValue N0 = N->getOperand(0);
3316   SDValue N1 = N->getOperand(1);
3317   EVT VT = N0.getValueType();
3318   SDLoc DL(N);
3319
3320   // fold vector ops
3321   if (VT.isVector()) {
3322     // TODO SimplifyVBinOp
3323
3324     // fold (sub_sat x, 0) -> x, vector edition
3325     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3326       return N0;
3327   }
3328
3329   // fold (sub_sat x, undef) -> 0
3330   if (N0.isUndef() || N1.isUndef())
3331     return DAG.getConstant(0, DL, VT);
3332
3333   // fold (sub_sat x, x) -> 0
3334   if (N0 == N1)
3335     return DAG.getConstant(0, DL, VT);
3336
3337   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3338       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3339     // fold (sub_sat c1, c2) -> c3
3340     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3341                                       N1.getNode());
3342   }
3343
3344   // fold (sub_sat x, 0) -> x
3345   if (isNullConstant(N1))
3346     return N0;
3347
3348   return SDValue();
3349 }
3350
3351 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3352   SDValue N0 = N->getOperand(0);
3353   SDValue N1 = N->getOperand(1);
3354   EVT VT = N0.getValueType();
3355   SDLoc DL(N);
3356
3357   // If the flag result is dead, turn this into an SUB.
3358   if (!N->hasAnyUseOfValue(1))
3359     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3360                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3361
3362   // fold (subc x, x) -> 0 + no borrow
3363   if (N0 == N1)
3364     return CombineTo(N, DAG.getConstant(0, DL, VT),
3365                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3366
3367   // fold (subc x, 0) -> x + no borrow
3368   if (isNullConstant(N1))
3369     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3370
3371   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3372   if (isAllOnesConstant(N0))
3373     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3374                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3375
3376   return SDValue();
3377 }
3378
3379 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3380   SDValue N0 = N->getOperand(0);
3381   SDValue N1 = N->getOperand(1);
3382   EVT VT = N0.getValueType();
3383   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3384
3385   EVT CarryVT = N->getValueType(1);
3386   SDLoc DL(N);
3387
3388   // If the flag result is dead, turn this into an SUB.
3389   if (!N->hasAnyUseOfValue(1))
3390     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3391                      DAG.getUNDEF(CarryVT));
3392
3393   // fold (subo x, x) -> 0 + no borrow
3394   if (N0 == N1)
3395     return CombineTo(N, DAG.getConstant(0, DL, VT),
3396                      DAG.getConstant(0, DL, CarryVT));
3397
3398   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3399
3400   // fold (subox, c) -> (addo x, -c)
3401   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3402     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3403                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3404   }
3405
3406   // fold (subo x, 0) -> x + no borrow
3407   if (isNullOrNullSplat(N1))
3408     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3409
3410   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3411   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3412     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3413                      DAG.getConstant(0, DL, CarryVT));
3414
3415   return SDValue();
3416 }
3417
3418 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3419   SDValue N0 = N->getOperand(0);
3420   SDValue N1 = N->getOperand(1);
3421   SDValue CarryIn = N->getOperand(2);
3422
3423   // fold (sube x, y, false) -> (subc x, y)
3424   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3425     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3426
3427   return SDValue();
3428 }
3429
3430 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3431   SDValue N0 = N->getOperand(0);
3432   SDValue N1 = N->getOperand(1);
3433   SDValue CarryIn = N->getOperand(2);
3434
3435   // fold (subcarry x, y, false) -> (usubo x, y)
3436   if (isNullConstant(CarryIn)) {
3437     if (!LegalOperations ||
3438         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3439       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3440   }
3441
3442   return SDValue();
3443 }
3444
3445 SDValue DAGCombiner::visitMUL(SDNode *N) {
3446   SDValue N0 = N->getOperand(0);
3447   SDValue N1 = N->getOperand(1);
3448   EVT VT = N0.getValueType();
3449
3450   // fold (mul x, undef) -> 0
3451   if (N0.isUndef() || N1.isUndef())
3452     return DAG.getConstant(0, SDLoc(N), VT);
3453
3454   bool N0IsConst = false;
3455   bool N1IsConst = false;
3456   bool N1IsOpaqueConst = false;
3457   bool N0IsOpaqueConst = false;
3458   APInt ConstValue0, ConstValue1;
3459   // fold vector ops
3460   if (VT.isVector()) {
3461     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3462       return FoldedVOp;
3463
3464     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3465     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3466     assert((!N0IsConst ||
3467             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3468            "Splat APInt should be element width");
3469     assert((!N1IsConst ||
3470             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3471            "Splat APInt should be element width");
3472   } else {
3473     N0IsConst = isa<ConstantSDNode>(N0);
3474     if (N0IsConst) {
3475       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3476       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3477     }
3478     N1IsConst = isa<ConstantSDNode>(N1);
3479     if (N1IsConst) {
3480       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3481       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3482     }
3483   }
3484
3485   // fold (mul c1, c2) -> c1*c2
3486   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3487     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3488                                       N0.getNode(), N1.getNode());
3489
3490   // canonicalize constant to RHS (vector doesn't have to splat)
3491   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3492      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3493     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3494   // fold (mul x, 0) -> 0
3495   if (N1IsConst && ConstValue1.isNullValue())
3496     return N1;
3497   // fold (mul x, 1) -> x
3498   if (N1IsConst && ConstValue1.isOneValue())
3499     return N0;
3500
3501   if (SDValue NewSel = foldBinOpIntoSelect(N))
3502     return NewSel;
3503
3504   // fold (mul x, -1) -> 0-x
3505   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3506     SDLoc DL(N);
3507     return DAG.getNode(ISD::SUB, DL, VT,
3508                        DAG.getConstant(0, DL, VT), N0);
3509   }
3510   // fold (mul x, (1 << c)) -> x << c
3511   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3512       DAG.isKnownToBeAPowerOfTwo(N1) &&
3513       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3514     SDLoc DL(N);
3515     SDValue LogBase2 = BuildLogBase2(N1, DL);
3516     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3517     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3518     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3519   }
3520   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3521   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3522     unsigned Log2Val = (-ConstValue1).logBase2();
3523     SDLoc DL(N);
3524     // FIXME: If the input is something that is easily negated (e.g. a
3525     // single-use add), we should put the negate there.
3526     return DAG.getNode(ISD::SUB, DL, VT,
3527                        DAG.getConstant(0, DL, VT),
3528                        DAG.getNode(ISD::SHL, DL, VT, N0,
3529                             DAG.getConstant(Log2Val, DL,
3530                                       getShiftAmountTy(N0.getValueType()))));
3531   }
3532
3533   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3534   // mul x, (2^N + 1) --> add (shl x, N), x
3535   // mul x, (2^N - 1) --> sub (shl x, N), x
3536   // Examples: x * 33 --> (x << 5) + x
3537   //           x * 15 --> (x << 4) - x
3538   //           x * -33 --> -((x << 5) + x)
3539   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3540   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3541     // TODO: We could handle more general decomposition of any constant by
3542     //       having the target set a limit on number of ops and making a
3543     //       callback to determine that sequence (similar to sqrt expansion).
3544     unsigned MathOp = ISD::DELETED_NODE;
3545     APInt MulC = ConstValue1.abs();
3546     if ((MulC - 1).isPowerOf2())
3547       MathOp = ISD::ADD;
3548     else if ((MulC + 1).isPowerOf2())
3549       MathOp = ISD::SUB;
3550
3551     if (MathOp != ISD::DELETED_NODE) {
3552       unsigned ShAmt =
3553           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3554       assert(ShAmt < VT.getScalarSizeInBits() &&
3555              "multiply-by-constant generated out of bounds shift");
3556       SDLoc DL(N);
3557       SDValue Shl =
3558           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3559       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3560       if (ConstValue1.isNegative())
3561         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3562       return R;
3563     }
3564   }
3565
3566   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3567   if (N0.getOpcode() == ISD::SHL &&
3568       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3569       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3570     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3571     if (isConstantOrConstantVector(C3))
3572       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3573   }
3574
3575   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3576   // use.
3577   {
3578     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3579
3580     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3581     if (N0.getOpcode() == ISD::SHL &&
3582         isConstantOrConstantVector(N0.getOperand(1)) &&
3583         N0.getNode()->hasOneUse()) {
3584       Sh = N0; Y = N1;
3585     } else if (N1.getOpcode() == ISD::SHL &&
3586                isConstantOrConstantVector(N1.getOperand(1)) &&
3587                N1.getNode()->hasOneUse()) {
3588       Sh = N1; Y = N0;
3589     }
3590
3591     if (Sh.getNode()) {
3592       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3593       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3594     }
3595   }
3596
3597   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3598   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3599       N0.getOpcode() == ISD::ADD &&
3600       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3601       isMulAddWithConstProfitable(N, N0, N1))
3602       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3603                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3604                                      N0.getOperand(0), N1),
3605                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3606                                      N0.getOperand(1), N1));
3607
3608   // reassociate mul
3609   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3610     return RMUL;
3611
3612   return SDValue();
3613 }
3614
3615 /// Return true if divmod libcall is available.
3616 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3617                                      const TargetLowering &TLI) {
3618   RTLIB::Libcall LC;
3619   EVT NodeType = Node->getValueType(0);
3620   if (!NodeType.isSimple())
3621     return false;
3622   switch (NodeType.getSimpleVT().SimpleTy) {
3623   default: return false; // No libcall for vector types.
3624   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3625   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3626   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3627   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3628   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3629   }
3630
3631   return TLI.getLibcallName(LC) != nullptr;
3632 }
3633
3634 /// Issue divrem if both quotient and remainder are needed.
3635 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3636   if (Node->use_empty())
3637     return SDValue(); // This is a dead node, leave it alone.
3638
3639   unsigned Opcode = Node->getOpcode();
3640   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3641   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3642
3643   // DivMod lib calls can still work on non-legal types if using lib-calls.
3644   EVT VT = Node->getValueType(0);
3645   if (VT.isVector() || !VT.isInteger())
3646     return SDValue();
3647
3648   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3649     return SDValue();
3650
3651   // If DIVREM is going to get expanded into a libcall,
3652   // but there is no libcall available, then don't combine.
3653   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3654       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3655     return SDValue();
3656
3657   // If div is legal, it's better to do the normal expansion
3658   unsigned OtherOpcode = 0;
3659   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3660     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3661     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3662       return SDValue();
3663   } else {
3664     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3665     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3666       return SDValue();
3667   }
3668
3669   SDValue Op0 = Node->getOperand(0);
3670   SDValue Op1 = Node->getOperand(1);
3671   SDValue combined;
3672   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3673          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3674     SDNode *User = *UI;
3675     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3676         User->use_empty())
3677       continue;
3678     // Convert the other matching node(s), too;
3679     // otherwise, the DIVREM may get target-legalized into something
3680     // target-specific that we won't be able to recognize.
3681     unsigned UserOpc = User->getOpcode();
3682     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3683         User->getOperand(0) == Op0 &&
3684         User->getOperand(1) == Op1) {
3685       if (!combined) {
3686         if (UserOpc == OtherOpcode) {
3687           SDVTList VTs = DAG.getVTList(VT, VT);
3688           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3689         } else if (UserOpc == DivRemOpc) {
3690           combined = SDValue(User, 0);
3691         } else {
3692           assert(UserOpc == Opcode);
3693           continue;
3694         }
3695       }
3696       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3697         CombineTo(User, combined);
3698       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3699         CombineTo(User, combined.getValue(1));
3700     }
3701   }
3702   return combined;
3703 }
3704
3705 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3706   SDValue N0 = N->getOperand(0);
3707   SDValue N1 = N->getOperand(1);
3708   EVT VT = N->getValueType(0);
3709   SDLoc DL(N);
3710
3711   unsigned Opc = N->getOpcode();
3712   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3713   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3714
3715   // X / undef -> undef
3716   // X % undef -> undef
3717   // X / 0 -> undef
3718   // X % 0 -> undef
3719   // NOTE: This includes vectors where any divisor element is zero/undef.
3720   if (DAG.isUndef(Opc, {N0, N1}))
3721     return DAG.getUNDEF(VT);
3722
3723   // undef / X -> 0
3724   // undef % X -> 0
3725   if (N0.isUndef())
3726     return DAG.getConstant(0, DL, VT);
3727
3728   // 0 / X -> 0
3729   // 0 % X -> 0
3730   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3731   if (N0C && N0C->isNullValue())
3732     return N0;
3733
3734   // X / X -> 1
3735   // X % X -> 0
3736   if (N0 == N1)
3737     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3738
3739   // X / 1 -> X
3740   // X % 1 -> 0
3741   // If this is a boolean op (single-bit element type), we can't have
3742   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3743   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3744   // it's a 1.
3745   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3746     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3747
3748   return SDValue();
3749 }
3750
3751 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3752   SDValue N0 = N->getOperand(0);
3753   SDValue N1 = N->getOperand(1);
3754   EVT VT = N->getValueType(0);
3755   EVT CCVT = getSetCCResultType(VT);
3756
3757   // fold vector ops
3758   if (VT.isVector())
3759     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3760       return FoldedVOp;
3761
3762   SDLoc DL(N);
3763
3764   // fold (sdiv c1, c2) -> c1/c2
3765   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3766   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3767   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3768     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3769   // fold (sdiv X, -1) -> 0-X
3770   if (N1C && N1C->isAllOnesValue())
3771     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3772   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3773   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3774     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3775                          DAG.getConstant(1, DL, VT),
3776                          DAG.getConstant(0, DL, VT));
3777
3778   if (SDValue V = simplifyDivRem(N, DAG))
3779     return V;
3780
3781   if (SDValue NewSel = foldBinOpIntoSelect(N))
3782     return NewSel;
3783
3784   // If we know the sign bits of both operands are zero, strength reduce to a
3785   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3786   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3787     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3788
3789   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3790     // If the corresponding remainder node exists, update its users with
3791     // (Dividend - (Quotient * Divisor).
3792     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3793                                               { N0, N1 })) {
3794       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3795       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3796       AddToWorklist(Mul.getNode());
3797       AddToWorklist(Sub.getNode());
3798       CombineTo(RemNode, Sub);
3799     }
3800     return V;
3801   }
3802
3803   // sdiv, srem -> sdivrem
3804   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3805   // true.  Otherwise, we break the simplification logic in visitREM().
3806   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3807   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3808     if (SDValue DivRem = useDivRem(N))
3809         return DivRem;
3810
3811   return SDValue();
3812 }
3813
3814 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3815   SDLoc DL(N);
3816   EVT VT = N->getValueType(0);
3817   EVT CCVT = getSetCCResultType(VT);
3818   unsigned BitWidth = VT.getScalarSizeInBits();
3819
3820   // Helper for determining whether a value is a power-2 constant scalar or a
3821   // vector of such elements.
3822   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3823     if (C->isNullValue() || C->isOpaque())
3824       return false;
3825     if (C->getAPIntValue().isPowerOf2())
3826       return true;
3827     if ((-C->getAPIntValue()).isPowerOf2())
3828       return true;
3829     return false;
3830   };
3831
3832   // fold (sdiv X, pow2) -> simple ops after legalize
3833   // FIXME: We check for the exact bit here because the generic lowering gives
3834   // better results in that case. The target-specific lowering should learn how
3835   // to handle exact sdivs efficiently.
3836   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3837     // Target-specific implementation of sdiv x, pow2.
3838     if (SDValue Res = BuildSDIVPow2(N))
3839       return Res;
3840
3841     // Create constants that are functions of the shift amount value.
3842     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3843     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3844     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3845     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3846     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3847     if (!isConstantOrConstantVector(Inexact))
3848       return SDValue();
3849
3850     // Splat the sign bit into the register
3851     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3852                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3853     AddToWorklist(Sign.getNode());
3854
3855     // Add (N0 < 0) ? abs2 - 1 : 0;
3856     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3857     AddToWorklist(Srl.getNode());
3858     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3859     AddToWorklist(Add.getNode());
3860     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3861     AddToWorklist(Sra.getNode());
3862
3863     // Special case: (sdiv X, 1) -> X
3864     // Special Case: (sdiv X, -1) -> 0-X
3865     SDValue One = DAG.getConstant(1, DL, VT);
3866     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3867     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3868     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3869     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3870     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3871
3872     // If dividing by a positive value, we're done. Otherwise, the result must
3873     // be negated.
3874     SDValue Zero = DAG.getConstant(0, DL, VT);
3875     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3876
3877     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3878     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3879     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3880     return Res;
3881   }
3882
3883   // If integer divide is expensive and we satisfy the requirements, emit an
3884   // alternate sequence.  Targets may check function attributes for size/speed
3885   // trade-offs.
3886   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3887   if (isConstantOrConstantVector(N1) &&
3888       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3889     if (SDValue Op = BuildSDIV(N))
3890       return Op;
3891
3892   return SDValue();
3893 }
3894
3895 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3896   SDValue N0 = N->getOperand(0);
3897   SDValue N1 = N->getOperand(1);
3898   EVT VT = N->getValueType(0);
3899   EVT CCVT = getSetCCResultType(VT);
3900
3901   // fold vector ops
3902   if (VT.isVector())
3903     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3904       return FoldedVOp;
3905
3906   SDLoc DL(N);
3907
3908   // fold (udiv c1, c2) -> c1/c2
3909   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3910   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3911   if (N0C && N1C)
3912     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3913                                                     N0C, N1C))
3914       return Folded;
3915   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3916   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3917     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3918                          DAG.getConstant(1, DL, VT),
3919                          DAG.getConstant(0, DL, VT));
3920
3921   if (SDValue V = simplifyDivRem(N, DAG))
3922     return V;
3923
3924   if (SDValue NewSel = foldBinOpIntoSelect(N))
3925     return NewSel;
3926
3927   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3928     // If the corresponding remainder node exists, update its users with
3929     // (Dividend - (Quotient * Divisor).
3930     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3931                                               { N0, N1 })) {
3932       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3933       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3934       AddToWorklist(Mul.getNode());
3935       AddToWorklist(Sub.getNode());
3936       CombineTo(RemNode, Sub);
3937     }
3938     return V;
3939   }
3940
3941   // sdiv, srem -> sdivrem
3942   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3943   // true.  Otherwise, we break the simplification logic in visitREM().
3944   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3945   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3946     if (SDValue DivRem = useDivRem(N))
3947         return DivRem;
3948
3949   return SDValue();
3950 }
3951
3952 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3953   SDLoc DL(N);
3954   EVT VT = N->getValueType(0);
3955
3956   // fold (udiv x, (1 << c)) -> x >>u c
3957   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3958       DAG.isKnownToBeAPowerOfTwo(N1)) {
3959     SDValue LogBase2 = BuildLogBase2(N1, DL);
3960     AddToWorklist(LogBase2.getNode());
3961
3962     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3963     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3964     AddToWorklist(Trunc.getNode());
3965     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3966   }
3967
3968   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3969   if (N1.getOpcode() == ISD::SHL) {
3970     SDValue N10 = N1.getOperand(0);
3971     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3972         DAG.isKnownToBeAPowerOfTwo(N10)) {
3973       SDValue LogBase2 = BuildLogBase2(N10, DL);
3974       AddToWorklist(LogBase2.getNode());
3975
3976       EVT ADDVT = N1.getOperand(1).getValueType();
3977       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3978       AddToWorklist(Trunc.getNode());
3979       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3980       AddToWorklist(Add.getNode());
3981       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3982     }
3983   }
3984
3985   // fold (udiv x, c) -> alternate
3986   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3987   if (isConstantOrConstantVector(N1) &&
3988       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3989     if (SDValue Op = BuildUDIV(N))
3990       return Op;
3991
3992   return SDValue();
3993 }
3994
3995 // handles ISD::SREM and ISD::UREM
3996 SDValue DAGCombiner::visitREM(SDNode *N) {
3997   unsigned Opcode = N->getOpcode();
3998   SDValue N0 = N->getOperand(0);
3999   SDValue N1 = N->getOperand(1);
4000   EVT VT = N->getValueType(0);
4001   EVT CCVT = getSetCCResultType(VT);
4002
4003   bool isSigned = (Opcode == ISD::SREM);
4004   SDLoc DL(N);
4005
4006   // fold (rem c1, c2) -> c1%c2
4007   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4008   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4009   if (N0C && N1C)
4010     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
4011       return Folded;
4012   // fold (urem X, -1) -> select(X == -1, 0, x)
4013   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4014     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4015                          DAG.getConstant(0, DL, VT), N0);
4016
4017   if (SDValue V = simplifyDivRem(N, DAG))
4018     return V;
4019
4020   if (SDValue NewSel = foldBinOpIntoSelect(N))
4021     return NewSel;
4022
4023   if (isSigned) {
4024     // If we know the sign bits of both operands are zero, strength reduce to a
4025     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4026     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4027       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4028   } else {
4029     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4030     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4031       // fold (urem x, pow2) -> (and x, pow2-1)
4032       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4033       AddToWorklist(Add.getNode());
4034       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4035     }
4036     if (N1.getOpcode() == ISD::SHL &&
4037         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4038       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4039       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4040       AddToWorklist(Add.getNode());
4041       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4042     }
4043   }
4044
4045   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4046
4047   // If X/C can be simplified by the division-by-constant logic, lower
4048   // X%C to the equivalent of X-X/C*C.
4049   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4050   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4051   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4052   // combine will not return a DIVREM.  Regardless, checking cheapness here
4053   // makes sense since the simplification results in fatter code.
4054   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4055     SDValue OptimizedDiv =
4056         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4057     if (OptimizedDiv.getNode()) {
4058       // If the equivalent Div node also exists, update its users.
4059       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4060       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4061                                                 { N0, N1 }))
4062         CombineTo(DivNode, OptimizedDiv);
4063       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4064       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4065       AddToWorklist(OptimizedDiv.getNode());
4066       AddToWorklist(Mul.getNode());
4067       return Sub;
4068     }
4069   }
4070
4071   // sdiv, srem -> sdivrem
4072   if (SDValue DivRem = useDivRem(N))
4073     return DivRem.getValue(1);
4074
4075   return SDValue();
4076 }
4077
4078 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4079   SDValue N0 = N->getOperand(0);
4080   SDValue N1 = N->getOperand(1);
4081   EVT VT = N->getValueType(0);
4082   SDLoc DL(N);
4083
4084   if (VT.isVector()) {
4085     // fold (mulhs x, 0) -> 0
4086     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4087       return N1;
4088     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4089       return N0;
4090   }
4091
4092   // fold (mulhs x, 0) -> 0
4093   if (isNullConstant(N1))
4094     return N1;
4095   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4096   if (isOneConstant(N1))
4097     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4098                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
4099                                        getShiftAmountTy(N0.getValueType())));
4100
4101   // fold (mulhs x, undef) -> 0
4102   if (N0.isUndef() || N1.isUndef())
4103     return DAG.getConstant(0, DL, VT);
4104
4105   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4106   // plus a shift.
4107   if (VT.isSimple() && !VT.isVector()) {
4108     MVT Simple = VT.getSimpleVT();
4109     unsigned SimpleSize = Simple.getSizeInBits();
4110     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4111     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4112       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4113       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4114       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4115       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4116             DAG.getConstant(SimpleSize, DL,
4117                             getShiftAmountTy(N1.getValueType())));
4118       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4119     }
4120   }
4121
4122   return SDValue();
4123 }
4124
4125 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4126   SDValue N0 = N->getOperand(0);
4127   SDValue N1 = N->getOperand(1);
4128   EVT VT = N->getValueType(0);
4129   SDLoc DL(N);
4130
4131   if (VT.isVector()) {
4132     // fold (mulhu x, 0) -> 0
4133     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4134       return N1;
4135     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4136       return N0;
4137   }
4138
4139   // fold (mulhu x, 0) -> 0
4140   if (isNullConstant(N1))
4141     return N1;
4142   // fold (mulhu x, 1) -> 0
4143   if (isOneConstant(N1))
4144     return DAG.getConstant(0, DL, N0.getValueType());
4145   // fold (mulhu x, undef) -> 0
4146   if (N0.isUndef() || N1.isUndef())
4147     return DAG.getConstant(0, DL, VT);
4148
4149   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4150   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4151       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4152     unsigned NumEltBits = VT.getScalarSizeInBits();
4153     SDValue LogBase2 = BuildLogBase2(N1, DL);
4154     SDValue SRLAmt = DAG.getNode(
4155         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4156     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4157     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4158     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4159   }
4160
4161   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4162   // plus a shift.
4163   if (VT.isSimple() && !VT.isVector()) {
4164     MVT Simple = VT.getSimpleVT();
4165     unsigned SimpleSize = Simple.getSizeInBits();
4166     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4167     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4168       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4169       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4170       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4171       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4172             DAG.getConstant(SimpleSize, DL,
4173                             getShiftAmountTy(N1.getValueType())));
4174       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4175     }
4176   }
4177
4178   return SDValue();
4179 }
4180
4181 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4182 /// give the opcodes for the two computations that are being performed. Return
4183 /// true if a simplification was made.
4184 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4185                                                 unsigned HiOp) {
4186   // If the high half is not needed, just compute the low half.
4187   bool HiExists = N->hasAnyUseOfValue(1);
4188   if (!HiExists && (!LegalOperations ||
4189                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4190     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4191     return CombineTo(N, Res, Res);
4192   }
4193
4194   // If the low half is not needed, just compute the high half.
4195   bool LoExists = N->hasAnyUseOfValue(0);
4196   if (!LoExists && (!LegalOperations ||
4197                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4198     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4199     return CombineTo(N, Res, Res);
4200   }
4201
4202   // If both halves are used, return as it is.
4203   if (LoExists && HiExists)
4204     return SDValue();
4205
4206   // If the two computed results can be simplified separately, separate them.
4207   if (LoExists) {
4208     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4209     AddToWorklist(Lo.getNode());
4210     SDValue LoOpt = combine(Lo.getNode());
4211     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4212         (!LegalOperations ||
4213          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4214       return CombineTo(N, LoOpt, LoOpt);
4215   }
4216
4217   if (HiExists) {
4218     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4219     AddToWorklist(Hi.getNode());
4220     SDValue HiOpt = combine(Hi.getNode());
4221     if (HiOpt.getNode() && HiOpt != Hi &&
4222         (!LegalOperations ||
4223          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4224       return CombineTo(N, HiOpt, HiOpt);
4225   }
4226
4227   return SDValue();
4228 }
4229
4230 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4231   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4232     return Res;
4233
4234   EVT VT = N->getValueType(0);
4235   SDLoc DL(N);
4236
4237   // If the type is twice as wide is legal, transform the mulhu to a wider
4238   // multiply plus a shift.
4239   if (VT.isSimple() && !VT.isVector()) {
4240     MVT Simple = VT.getSimpleVT();
4241     unsigned SimpleSize = Simple.getSizeInBits();
4242     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4243     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4244       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4245       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4246       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4247       // Compute the high part as N1.
4248       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4249             DAG.getConstant(SimpleSize, DL,
4250                             getShiftAmountTy(Lo.getValueType())));
4251       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4252       // Compute the low part as N0.
4253       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4254       return CombineTo(N, Lo, Hi);
4255     }
4256   }
4257
4258   return SDValue();
4259 }
4260
4261 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4262   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4263     return Res;
4264
4265   EVT VT = N->getValueType(0);
4266   SDLoc DL(N);
4267
4268   // If the type is twice as wide is legal, transform the mulhu to a wider
4269   // multiply plus a shift.
4270   if (VT.isSimple() && !VT.isVector()) {
4271     MVT Simple = VT.getSimpleVT();
4272     unsigned SimpleSize = Simple.getSizeInBits();
4273     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4274     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4275       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4276       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4277       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4278       // Compute the high part as N1.
4279       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4280             DAG.getConstant(SimpleSize, DL,
4281                             getShiftAmountTy(Lo.getValueType())));
4282       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4283       // Compute the low part as N0.
4284       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4285       return CombineTo(N, Lo, Hi);
4286     }
4287   }
4288
4289   return SDValue();
4290 }
4291
4292 SDValue DAGCombiner::visitMULO(SDNode *N) {
4293   bool IsSigned = (ISD::SMULO == N->getOpcode());
4294
4295   // (mulo x, 2) -> (addo x, x)
4296   if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
4297     if (C2->getAPIntValue() == 2)
4298       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
4299                          N->getVTList(), N->getOperand(0), N->getOperand(0));
4300
4301   return SDValue();
4302 }
4303
4304 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4305   SDValue N0 = N->getOperand(0);
4306   SDValue N1 = N->getOperand(1);
4307   EVT VT = N0.getValueType();
4308
4309   // fold vector ops
4310   if (VT.isVector())
4311     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4312       return FoldedVOp;
4313
4314   // fold operation with constant operands.
4315   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4316   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4317   if (N0C && N1C)
4318     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4319
4320   // canonicalize constant to RHS
4321   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4322      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4323     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4324
4325   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4326   // Only do this if the current op isn't legal and the flipped is.
4327   unsigned Opcode = N->getOpcode();
4328   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4329   if (!TLI.isOperationLegal(Opcode, VT) &&
4330       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4331       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4332     unsigned AltOpcode;
4333     switch (Opcode) {
4334     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4335     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4336     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4337     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4338     default: llvm_unreachable("Unknown MINMAX opcode");
4339     }
4340     if (TLI.isOperationLegal(AltOpcode, VT))
4341       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4342   }
4343
4344   return SDValue();
4345 }
4346
4347 /// If this is a bitwise logic instruction and both operands have the same
4348 /// opcode, try to sink the other opcode after the logic instruction.
4349 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4350   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4351   EVT VT = N0.getValueType();
4352   unsigned LogicOpcode = N->getOpcode();
4353   unsigned HandOpcode = N0.getOpcode();
4354   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4355           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4356   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4357
4358   // Bail early if none of these transforms apply.
4359   if (N0.getNumOperands() == 0)
4360     return SDValue();
4361
4362   // FIXME: We should check number of uses of the operands to not increase
4363   //        the instruction count for all transforms.
4364
4365   // Handle size-changing casts.
4366   SDValue X = N0.getOperand(0);
4367   SDValue Y = N1.getOperand(0);
4368   EVT XVT = X.getValueType();
4369   SDLoc DL(N);
4370   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4371       HandOpcode == ISD::SIGN_EXTEND) {
4372     // If both operands have other uses, this transform would create extra
4373     // instructions without eliminating anything.
4374     if (!N0.hasOneUse() && !N1.hasOneUse())
4375       return SDValue();
4376     // We need matching integer source types.
4377     if (XVT != Y.getValueType())
4378       return SDValue();
4379     // Don't create an illegal op during or after legalization. Don't ever
4380     // create an unsupported vector op.
4381     if ((VT.isVector() || LegalOperations) &&
4382         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4383       return SDValue();
4384     // Avoid infinite looping with PromoteIntBinOp.
4385     // TODO: Should we apply desirable/legal constraints to all opcodes?
4386     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4387         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4388       return SDValue();
4389     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4390     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4391     return DAG.getNode(HandOpcode, DL, VT, Logic);
4392   }
4393
4394   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4395   if (HandOpcode == ISD::TRUNCATE) {
4396     // If both operands have other uses, this transform would create extra
4397     // instructions without eliminating anything.
4398     if (!N0.hasOneUse() && !N1.hasOneUse())
4399       return SDValue();
4400     // We need matching source types.
4401     if (XVT != Y.getValueType())
4402       return SDValue();
4403     // Don't create an illegal op during or after legalization.
4404     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4405       return SDValue();
4406     // Be extra careful sinking truncate. If it's free, there's no benefit in
4407     // widening a binop. Also, don't create a logic op on an illegal type.
4408     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4409       return SDValue();
4410     if (!TLI.isTypeLegal(XVT))
4411       return SDValue();
4412     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4413     return DAG.getNode(HandOpcode, DL, VT, Logic);
4414   }
4415
4416   // For binops SHL/SRL/SRA/AND:
4417   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4418   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4419        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4420       N0.getOperand(1) == N1.getOperand(1)) {
4421     // If either operand has other uses, this transform is not an improvement.
4422     if (!N0.hasOneUse() || !N1.hasOneUse())
4423       return SDValue();
4424     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4425     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4426   }
4427
4428   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4429   if (HandOpcode == ISD::BSWAP) {
4430     // If either operand has other uses, this transform is not an improvement.
4431     if (!N0.hasOneUse() || !N1.hasOneUse())
4432       return SDValue();
4433     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4434     return DAG.getNode(HandOpcode, DL, VT, Logic);
4435   }
4436
4437   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4438   // Only perform this optimization up until type legalization, before
4439   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4440   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4441   // we don't want to undo this promotion.
4442   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4443   // on scalars.
4444   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4445        Level <= AfterLegalizeTypes) {
4446     // Input types must be integer and the same.
4447     if (XVT.isInteger() && XVT == Y.getValueType()) {
4448       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4449       return DAG.getNode(HandOpcode, DL, VT, Logic);
4450     }
4451   }
4452
4453   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4454   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4455   // If both shuffles use the same mask, and both shuffle within a single
4456   // vector, then it is worthwhile to move the swizzle after the operation.
4457   // The type-legalizer generates this pattern when loading illegal
4458   // vector types from memory. In many cases this allows additional shuffle
4459   // optimizations.
4460   // There are other cases where moving the shuffle after the xor/and/or
4461   // is profitable even if shuffles don't perform a swizzle.
4462   // If both shuffles use the same mask, and both shuffles have the same first
4463   // or second operand, then it might still be profitable to move the shuffle
4464   // after the xor/and/or operation.
4465   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4466     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4467     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4468     assert(X.getValueType() == Y.getValueType() &&
4469            "Inputs to shuffles are not the same type");
4470
4471     // Check that both shuffles use the same mask. The masks are known to be of
4472     // the same length because the result vector type is the same.
4473     // Check also that shuffles have only one use to avoid introducing extra
4474     // instructions.
4475     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4476         !SVN0->getMask().equals(SVN1->getMask()))
4477       return SDValue();
4478
4479     // Don't try to fold this node if it requires introducing a
4480     // build vector of all zeros that might be illegal at this stage.
4481     SDValue ShOp = N0.getOperand(1);
4482     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4483       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4484
4485     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4486     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4487       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4488                                   N0.getOperand(0), N1.getOperand(0));
4489       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4490     }
4491
4492     // Don't try to fold this node if it requires introducing a
4493     // build vector of all zeros that might be illegal at this stage.
4494     ShOp = N0.getOperand(0);
4495     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4496       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4497
4498     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4499     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4500       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4501                                   N1.getOperand(1));
4502       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4503     }
4504   }
4505
4506   return SDValue();
4507 }
4508
4509 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4510 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4511                                        const SDLoc &DL) {
4512   SDValue LL, LR, RL, RR, N0CC, N1CC;
4513   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4514       !isSetCCEquivalent(N1, RL, RR, N1CC))
4515     return SDValue();
4516
4517   assert(N0.getValueType() == N1.getValueType() &&
4518          "Unexpected operand types for bitwise logic op");
4519   assert(LL.getValueType() == LR.getValueType() &&
4520          RL.getValueType() == RR.getValueType() &&
4521          "Unexpected operand types for setcc");
4522
4523   // If we're here post-legalization or the logic op type is not i1, the logic
4524   // op type must match a setcc result type. Also, all folds require new
4525   // operations on the left and right operands, so those types must match.
4526   EVT VT = N0.getValueType();
4527   EVT OpVT = LL.getValueType();
4528   if (LegalOperations || VT.getScalarType() != MVT::i1)
4529     if (VT != getSetCCResultType(OpVT))
4530       return SDValue();
4531   if (OpVT != RL.getValueType())
4532     return SDValue();
4533
4534   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4535   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4536   bool IsInteger = OpVT.isInteger();
4537   if (LR == RR && CC0 == CC1 && IsInteger) {
4538     bool IsZero = isNullOrNullSplat(LR);
4539     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4540
4541     // All bits clear?
4542     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4543     // All sign bits clear?
4544     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4545     // Any bits set?
4546     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4547     // Any sign bits set?
4548     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4549
4550     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4551     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4552     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4553     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4554     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4555       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4556       AddToWorklist(Or.getNode());
4557       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4558     }
4559
4560     // All bits set?
4561     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4562     // All sign bits set?
4563     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4564     // Any bits clear?
4565     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4566     // Any sign bits clear?
4567     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4568
4569     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4570     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4571     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4572     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4573     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4574       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4575       AddToWorklist(And.getNode());
4576       return DAG.getSetCC(DL, VT, And, LR, CC1);
4577     }
4578   }
4579
4580   // TODO: What is the 'or' equivalent of this fold?
4581   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4582   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4583       IsInteger && CC0 == ISD::SETNE &&
4584       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4585        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4586     SDValue One = DAG.getConstant(1, DL, OpVT);
4587     SDValue Two = DAG.getConstant(2, DL, OpVT);
4588     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4589     AddToWorklist(Add.getNode());
4590     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4591   }
4592
4593   // Try more general transforms if the predicates match and the only user of
4594   // the compares is the 'and' or 'or'.
4595   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4596       N0.hasOneUse() && N1.hasOneUse()) {
4597     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4598     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4599     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4600       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4601       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4602       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4603       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4604       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4605     }
4606
4607     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4608     // TODO - support non-uniform vector amounts.
4609     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4610       // Match a shared variable operand and 2 non-opaque constant operands.
4611       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4612       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4613       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4614         // Canonicalize larger constant as C0.
4615         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4616           std::swap(C0, C1);
4617
4618         // The difference of the constants must be a single bit.
4619         const APInt &C0Val = C0->getAPIntValue();
4620         const APInt &C1Val = C1->getAPIntValue();
4621         if ((C0Val - C1Val).isPowerOf2()) {
4622           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4623           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4624           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4625           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4626           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4627           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4628           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4629           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4630         }
4631       }
4632     }
4633   }
4634
4635   // Canonicalize equivalent operands to LL == RL.
4636   if (LL == RR && LR == RL) {
4637     CC1 = ISD::getSetCCSwappedOperands(CC1);
4638     std::swap(RL, RR);
4639   }
4640
4641   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4642   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4643   if (LL == RL && LR == RR) {
4644     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4645                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4646     if (NewCC != ISD::SETCC_INVALID &&
4647         (!LegalOperations ||
4648          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4649           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4650       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4651   }
4652
4653   return SDValue();
4654 }
4655
4656 /// This contains all DAGCombine rules which reduce two values combined by
4657 /// an And operation to a single value. This makes them reusable in the context
4658 /// of visitSELECT(). Rules involving constants are not included as
4659 /// visitSELECT() already handles those cases.
4660 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4661   EVT VT = N1.getValueType();
4662   SDLoc DL(N);
4663
4664   // fold (and x, undef) -> 0
4665   if (N0.isUndef() || N1.isUndef())
4666     return DAG.getConstant(0, DL, VT);
4667
4668   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4669     return V;
4670
4671   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4672       VT.getSizeInBits() <= 64) {
4673     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4674       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4675         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4676         // immediate for an add, but it is legal if its top c2 bits are set,
4677         // transform the ADD so the immediate doesn't need to be materialized
4678         // in a register.
4679         APInt ADDC = ADDI->getAPIntValue();
4680         APInt SRLC = SRLI->getAPIntValue();
4681         if (ADDC.getMinSignedBits() <= 64 &&
4682             SRLC.ult(VT.getSizeInBits()) &&
4683             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4684           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4685                                              SRLC.getZExtValue());
4686           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4687             ADDC |= Mask;
4688             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4689               SDLoc DL0(N0);
4690               SDValue NewAdd =
4691                 DAG.getNode(ISD::ADD, DL0, VT,
4692                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4693               CombineTo(N0.getNode(), NewAdd);
4694               // Return N so it doesn't get rechecked!
4695               return SDValue(N, 0);
4696             }
4697           }
4698         }
4699       }
4700     }
4701   }
4702
4703   // Reduce bit extract of low half of an integer to the narrower type.
4704   // (and (srl i64:x, K), KMask) ->
4705   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4706   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4707     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4708       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4709         unsigned Size = VT.getSizeInBits();
4710         const APInt &AndMask = CAnd->getAPIntValue();
4711         unsigned ShiftBits = CShift->getZExtValue();
4712
4713         // Bail out, this node will probably disappear anyway.
4714         if (ShiftBits == 0)
4715           return SDValue();
4716
4717         unsigned MaskBits = AndMask.countTrailingOnes();
4718         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4719
4720         if (AndMask.isMask() &&
4721             // Required bits must not span the two halves of the integer and
4722             // must fit in the half size type.
4723             (ShiftBits + MaskBits <= Size / 2) &&
4724             TLI.isNarrowingProfitable(VT, HalfVT) &&
4725             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4726             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4727             TLI.isTruncateFree(VT, HalfVT) &&
4728             TLI.isZExtFree(HalfVT, VT)) {
4729           // The isNarrowingProfitable is to avoid regressions on PPC and
4730           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4731           // on downstream users of this. Those patterns could probably be
4732           // extended to handle extensions mixed in.
4733
4734           SDValue SL(N0);
4735           assert(MaskBits <= Size);
4736
4737           // Extracting the highest bit of the low half.
4738           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4739           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4740                                       N0.getOperand(0));
4741
4742           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4743           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4744           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4745           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4746           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4747         }
4748       }
4749     }
4750   }
4751
4752   return SDValue();
4753 }
4754
4755 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4756                                    EVT LoadResultTy, EVT &ExtVT) {
4757   if (!AndC->getAPIntValue().isMask())
4758     return false;
4759
4760   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4761
4762   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4763   EVT LoadedVT = LoadN->getMemoryVT();
4764
4765   if (ExtVT == LoadedVT &&
4766       (!LegalOperations ||
4767        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4768     // ZEXTLOAD will match without needing to change the size of the value being
4769     // loaded.
4770     return true;
4771   }
4772
4773   // Do not change the width of a volatile load.
4774   if (LoadN->isVolatile())
4775     return false;
4776
4777   // Do not generate loads of non-round integer types since these can
4778   // be expensive (and would be wrong if the type is not byte sized).
4779   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4780     return false;
4781
4782   if (LegalOperations &&
4783       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4784     return false;
4785
4786   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4787     return false;
4788
4789   return true;
4790 }
4791
4792 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4793                                     ISD::LoadExtType ExtType, EVT &MemVT,
4794                                     unsigned ShAmt) {
4795   if (!LDST)
4796     return false;
4797   // Only allow byte offsets.
4798   if (ShAmt % 8)
4799     return false;
4800
4801   // Do not generate loads of non-round integer types since these can
4802   // be expensive (and would be wrong if the type is not byte sized).
4803   if (!MemVT.isRound())
4804     return false;
4805
4806   // Don't change the width of a volatile load.
4807   if (LDST->isVolatile())
4808     return false;
4809
4810   // Verify that we are actually reducing a load width here.
4811   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4812     return false;
4813
4814   // Ensure that this isn't going to produce an unsupported unaligned access.
4815   if (ShAmt &&
4816       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4817                               LDST->getAddressSpace(), ShAmt / 8,
4818                               LDST->getMemOperand()->getFlags()))
4819     return false;
4820
4821   // It's not possible to generate a constant of extended or untyped type.
4822   EVT PtrType = LDST->getBasePtr().getValueType();
4823   if (PtrType == MVT::Untyped || PtrType.isExtended())
4824     return false;
4825
4826   if (isa<LoadSDNode>(LDST)) {
4827     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4828     // Don't transform one with multiple uses, this would require adding a new
4829     // load.
4830     if (!SDValue(Load, 0).hasOneUse())
4831       return false;
4832
4833     if (LegalOperations &&
4834         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4835       return false;
4836
4837     // For the transform to be legal, the load must produce only two values
4838     // (the value loaded and the chain).  Don't transform a pre-increment
4839     // load, for example, which produces an extra value.  Otherwise the
4840     // transformation is not equivalent, and the downstream logic to replace
4841     // uses gets things wrong.
4842     if (Load->getNumValues() > 2)
4843       return false;
4844
4845     // If the load that we're shrinking is an extload and we're not just
4846     // discarding the extension we can't simply shrink the load. Bail.
4847     // TODO: It would be possible to merge the extensions in some cases.
4848     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4849         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4850       return false;
4851
4852     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4853       return false;
4854   } else {
4855     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4856     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4857     // Can't write outside the original store
4858     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4859       return false;
4860
4861     if (LegalOperations &&
4862         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4863       return false;
4864   }
4865   return true;
4866 }
4867
4868 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4869                                     SmallVectorImpl<LoadSDNode*> &Loads,
4870                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4871                                     ConstantSDNode *Mask,
4872                                     SDNode *&NodeToMask) {
4873   // Recursively search for the operands, looking for loads which can be
4874   // narrowed.
4875   for (SDValue Op : N->op_values()) {
4876     if (Op.getValueType().isVector())
4877       return false;
4878
4879     // Some constants may need fixing up later if they are too large.
4880     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4881       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4882           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4883         NodesWithConsts.insert(N);
4884       continue;
4885     }
4886
4887     if (!Op.hasOneUse())
4888       return false;
4889
4890     switch(Op.getOpcode()) {
4891     case ISD::LOAD: {
4892       auto *Load = cast<LoadSDNode>(Op);
4893       EVT ExtVT;
4894       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4895           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4896
4897         // ZEXTLOAD is already small enough.
4898         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4899             ExtVT.bitsGE(Load->getMemoryVT()))
4900           continue;
4901
4902         // Use LE to convert equal sized loads to zext.
4903         if (ExtVT.bitsLE(Load->getMemoryVT()))
4904           Loads.push_back(Load);
4905
4906         continue;
4907       }
4908       return false;
4909     }
4910     case ISD::ZERO_EXTEND:
4911     case ISD::AssertZext: {
4912       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4913       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4914       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4915         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4916         Op.getOperand(0).getValueType();
4917
4918       // We can accept extending nodes if the mask is wider or an equal
4919       // width to the original type.
4920       if (ExtVT.bitsGE(VT))
4921         continue;
4922       break;
4923     }
4924     case ISD::OR:
4925     case ISD::XOR:
4926     case ISD::AND:
4927       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4928                              NodeToMask))
4929         return false;
4930       continue;
4931     }
4932
4933     // Allow one node which will masked along with any loads found.
4934     if (NodeToMask)
4935       return false;
4936
4937     // Also ensure that the node to be masked only produces one data result.
4938     NodeToMask = Op.getNode();
4939     if (NodeToMask->getNumValues() > 1) {
4940       bool HasValue = false;
4941       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4942         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4943         if (VT != MVT::Glue && VT != MVT::Other) {
4944           if (HasValue) {
4945             NodeToMask = nullptr;
4946             return false;
4947           }
4948           HasValue = true;
4949         }
4950       }
4951       assert(HasValue && "Node to be masked has no data result?");
4952     }
4953   }
4954   return true;
4955 }
4956
4957 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4958   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4959   if (!Mask)
4960     return false;
4961
4962   if (!Mask->getAPIntValue().isMask())
4963     return false;
4964
4965   // No need to do anything if the and directly uses a load.
4966   if (isa<LoadSDNode>(N->getOperand(0)))
4967     return false;
4968
4969   SmallVector<LoadSDNode*, 8> Loads;
4970   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4971   SDNode *FixupNode = nullptr;
4972   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4973     if (Loads.size() == 0)
4974       return false;
4975
4976     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4977     SDValue MaskOp = N->getOperand(1);
4978
4979     // If it exists, fixup the single node we allow in the tree that needs
4980     // masking.
4981     if (FixupNode) {
4982       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4983       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4984                                 FixupNode->getValueType(0),
4985                                 SDValue(FixupNode, 0), MaskOp);
4986       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4987       if (And.getOpcode() == ISD ::AND)
4988         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4989     }
4990
4991     // Narrow any constants that need it.
4992     for (auto *LogicN : NodesWithConsts) {
4993       SDValue Op0 = LogicN->getOperand(0);
4994       SDValue Op1 = LogicN->getOperand(1);
4995
4996       if (isa<ConstantSDNode>(Op0))
4997           std::swap(Op0, Op1);
4998
4999       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5000                                 Op1, MaskOp);
5001
5002       DAG.UpdateNodeOperands(LogicN, Op0, And);
5003     }
5004
5005     // Create narrow loads.
5006     for (auto *Load : Loads) {
5007       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5008       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5009                                 SDValue(Load, 0), MaskOp);
5010       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5011       if (And.getOpcode() == ISD ::AND)
5012         And = SDValue(
5013             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5014       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5015       assert(NewLoad &&
5016              "Shouldn't be masking the load if it can't be narrowed");
5017       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5018     }
5019     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5020     return true;
5021   }
5022   return false;
5023 }
5024
5025 // Unfold
5026 //    x &  (-1 'logical shift' y)
5027 // To
5028 //    (x 'opposite logical shift' y) 'logical shift' y
5029 // if it is better for performance.
5030 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5031   assert(N->getOpcode() == ISD::AND);
5032
5033   SDValue N0 = N->getOperand(0);
5034   SDValue N1 = N->getOperand(1);
5035
5036   // Do we actually prefer shifts over mask?
5037   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5038     return SDValue();
5039
5040   // Try to match  (-1 '[outer] logical shift' y)
5041   unsigned OuterShift;
5042   unsigned InnerShift; // The opposite direction to the OuterShift.
5043   SDValue Y;           // Shift amount.
5044   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5045     if (!M.hasOneUse())
5046       return false;
5047     OuterShift = M->getOpcode();
5048     if (OuterShift == ISD::SHL)
5049       InnerShift = ISD::SRL;
5050     else if (OuterShift == ISD::SRL)
5051       InnerShift = ISD::SHL;
5052     else
5053       return false;
5054     if (!isAllOnesConstant(M->getOperand(0)))
5055       return false;
5056     Y = M->getOperand(1);
5057     return true;
5058   };
5059
5060   SDValue X;
5061   if (matchMask(N1))
5062     X = N0;
5063   else if (matchMask(N0))
5064     X = N1;
5065   else
5066     return SDValue();
5067
5068   SDLoc DL(N);
5069   EVT VT = N->getValueType(0);
5070
5071   //     tmp = x   'opposite logical shift' y
5072   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5073   //     ret = tmp 'logical shift' y
5074   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5075
5076   return T1;
5077 }
5078
5079 SDValue DAGCombiner::visitAND(SDNode *N) {
5080   SDValue N0 = N->getOperand(0);
5081   SDValue N1 = N->getOperand(1);
5082   EVT VT = N1.getValueType();
5083
5084   // x & x --> x
5085   if (N0 == N1)
5086     return N0;
5087
5088   // fold vector ops
5089   if (VT.isVector()) {
5090     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5091       return FoldedVOp;
5092
5093     // fold (and x, 0) -> 0, vector edition
5094     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5095       // do not return N0, because undef node may exist in N0
5096       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5097                              SDLoc(N), N0.getValueType());
5098     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5099       // do not return N1, because undef node may exist in N1
5100       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5101                              SDLoc(N), N1.getValueType());
5102
5103     // fold (and x, -1) -> x, vector edition
5104     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5105       return N1;
5106     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5107       return N0;
5108   }
5109
5110   // fold (and c1, c2) -> c1&c2
5111   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5112   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5113   if (N0C && N1C && !N1C->isOpaque())
5114     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
5115   // canonicalize constant to RHS
5116   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5117       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5118     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5119   // fold (and x, -1) -> x
5120   if (isAllOnesConstant(N1))
5121     return N0;
5122   // if (and x, c) is known to be zero, return 0
5123   unsigned BitWidth = VT.getScalarSizeInBits();
5124   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5125                                    APInt::getAllOnesValue(BitWidth)))
5126     return DAG.getConstant(0, SDLoc(N), VT);
5127
5128   if (SDValue NewSel = foldBinOpIntoSelect(N))
5129     return NewSel;
5130
5131   // reassociate and
5132   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5133     return RAND;
5134
5135   // Try to convert a constant mask AND into a shuffle clear mask.
5136   if (VT.isVector())
5137     if (SDValue Shuffle = XformToShuffleWithZero(N))
5138       return Shuffle;
5139
5140   // fold (and (or x, C), D) -> D if (C & D) == D
5141   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5142     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5143   };
5144   if (N0.getOpcode() == ISD::OR &&
5145       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5146     return N1;
5147   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5148   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5149     SDValue N0Op0 = N0.getOperand(0);
5150     APInt Mask = ~N1C->getAPIntValue();
5151     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5152     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5153       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5154                                  N0.getValueType(), N0Op0);
5155
5156       // Replace uses of the AND with uses of the Zero extend node.
5157       CombineTo(N, Zext);
5158
5159       // We actually want to replace all uses of the any_extend with the
5160       // zero_extend, to avoid duplicating things.  This will later cause this
5161       // AND to be folded.
5162       CombineTo(N0.getNode(), Zext);
5163       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5164     }
5165   }
5166   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5167   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5168   // already be zero by virtue of the width of the base type of the load.
5169   //
5170   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5171   // more cases.
5172   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5173        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5174        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5175        N0.getOperand(0).getResNo() == 0) ||
5176       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5177     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5178                                          N0 : N0.getOperand(0) );
5179
5180     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5181     // This can be a pure constant or a vector splat, in which case we treat the
5182     // vector as a scalar and use the splat value.
5183     APInt Constant = APInt::getNullValue(1);
5184     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5185       Constant = C->getAPIntValue();
5186     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5187       APInt SplatValue, SplatUndef;
5188       unsigned SplatBitSize;
5189       bool HasAnyUndefs;
5190       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5191                                              SplatBitSize, HasAnyUndefs);
5192       if (IsSplat) {
5193         // Undef bits can contribute to a possible optimisation if set, so
5194         // set them.
5195         SplatValue |= SplatUndef;
5196
5197         // The splat value may be something like "0x00FFFFFF", which means 0 for
5198         // the first vector value and FF for the rest, repeating. We need a mask
5199         // that will apply equally to all members of the vector, so AND all the
5200         // lanes of the constant together.
5201         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5202
5203         // If the splat value has been compressed to a bitlength lower
5204         // than the size of the vector lane, we need to re-expand it to
5205         // the lane size.
5206         if (EltBitWidth > SplatBitSize)
5207           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5208                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5209             SplatValue |= SplatValue.shl(SplatBitSize);
5210
5211         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5212         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5213         if ((SplatBitSize % EltBitWidth) == 0) {
5214           Constant = APInt::getAllOnesValue(EltBitWidth);
5215           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5216             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5217         }
5218       }
5219     }
5220
5221     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5222     // actually legal and isn't going to get expanded, else this is a false
5223     // optimisation.
5224     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5225                                                     Load->getValueType(0),
5226                                                     Load->getMemoryVT());
5227
5228     // Resize the constant to the same size as the original memory access before
5229     // extension. If it is still the AllOnesValue then this AND is completely
5230     // unneeded.
5231     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5232
5233     bool B;
5234     switch (Load->getExtensionType()) {
5235     default: B = false; break;
5236     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5237     case ISD::ZEXTLOAD:
5238     case ISD::NON_EXTLOAD: B = true; break;
5239     }
5240
5241     if (B && Constant.isAllOnesValue()) {
5242       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5243       // preserve semantics once we get rid of the AND.
5244       SDValue NewLoad(Load, 0);
5245
5246       // Fold the AND away. NewLoad may get replaced immediately.
5247       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5248
5249       if (Load->getExtensionType() == ISD::EXTLOAD) {
5250         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5251                               Load->getValueType(0), SDLoc(Load),
5252                               Load->getChain(), Load->getBasePtr(),
5253                               Load->getOffset(), Load->getMemoryVT(),
5254                               Load->getMemOperand());
5255         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5256         if (Load->getNumValues() == 3) {
5257           // PRE/POST_INC loads have 3 values.
5258           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5259                            NewLoad.getValue(2) };
5260           CombineTo(Load, To, 3, true);
5261         } else {
5262           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5263         }
5264       }
5265
5266       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5267     }
5268   }
5269
5270   // fold (and (load x), 255) -> (zextload x, i8)
5271   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5272   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5273   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5274                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5275                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5276     if (SDValue Res = ReduceLoadWidth(N)) {
5277       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5278         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5279       AddToWorklist(N);
5280       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5281       return SDValue(N, 0);
5282     }
5283   }
5284
5285   if (Level >= AfterLegalizeTypes) {
5286     // Attempt to propagate the AND back up to the leaves which, if they're
5287     // loads, can be combined to narrow loads and the AND node can be removed.
5288     // Perform after legalization so that extend nodes will already be
5289     // combined into the loads.
5290     if (BackwardsPropagateMask(N, DAG)) {
5291       return SDValue(N, 0);
5292     }
5293   }
5294
5295   if (SDValue Combined = visitANDLike(N0, N1, N))
5296     return Combined;
5297
5298   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5299   if (N0.getOpcode() == N1.getOpcode())
5300     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5301       return V;
5302
5303   // Masking the negated extension of a boolean is just the zero-extended
5304   // boolean:
5305   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5306   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5307   //
5308   // Note: the SimplifyDemandedBits fold below can make an information-losing
5309   // transform, and then we have no way to find this better fold.
5310   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5311     if (isNullOrNullSplat(N0.getOperand(0))) {
5312       SDValue SubRHS = N0.getOperand(1);
5313       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5314           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5315         return SubRHS;
5316       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5317           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5318         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5319     }
5320   }
5321
5322   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5323   // fold (and (sra)) -> (and (srl)) when possible.
5324   if (SimplifyDemandedBits(SDValue(N, 0)))
5325     return SDValue(N, 0);
5326
5327   // fold (zext_inreg (extload x)) -> (zextload x)
5328   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5329   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5330       (ISD::isEXTLoad(N0.getNode()) ||
5331        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5332     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5333     EVT MemVT = LN0->getMemoryVT();
5334     // If we zero all the possible extended bits, then we can turn this into
5335     // a zextload if we are running before legalize or the operation is legal.
5336     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5337     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5338     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5339     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5340         ((!LegalOperations && !LN0->isVolatile()) ||
5341          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5342       SDValue ExtLoad =
5343           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5344                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5345       AddToWorklist(N);
5346       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5347       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5348     }
5349   }
5350
5351   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5352   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5353     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5354                                            N0.getOperand(1), false))
5355       return BSwap;
5356   }
5357
5358   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5359     return Shifts;
5360
5361   return SDValue();
5362 }
5363
5364 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5365 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5366                                         bool DemandHighBits) {
5367   if (!LegalOperations)
5368     return SDValue();
5369
5370   EVT VT = N->getValueType(0);
5371   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5372     return SDValue();
5373   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5374     return SDValue();
5375
5376   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5377   bool LookPassAnd0 = false;
5378   bool LookPassAnd1 = false;
5379   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5380       std::swap(N0, N1);
5381   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5382       std::swap(N0, N1);
5383   if (N0.getOpcode() == ISD::AND) {
5384     if (!N0.getNode()->hasOneUse())
5385       return SDValue();
5386     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5387     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5388     // This is needed for X86.
5389     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5390                   N01C->getZExtValue() != 0xFFFF))
5391       return SDValue();
5392     N0 = N0.getOperand(0);
5393     LookPassAnd0 = true;
5394   }
5395
5396   if (N1.getOpcode() == ISD::AND) {
5397     if (!N1.getNode()->hasOneUse())
5398       return SDValue();
5399     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5400     if (!N11C || N11C->getZExtValue() != 0xFF)
5401       return SDValue();
5402     N1 = N1.getOperand(0);
5403     LookPassAnd1 = true;
5404   }
5405
5406   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5407     std::swap(N0, N1);
5408   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5409     return SDValue();
5410   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5411     return SDValue();
5412
5413   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5414   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5415   if (!N01C || !N11C)
5416     return SDValue();
5417   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5418     return SDValue();
5419
5420   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5421   SDValue N00 = N0->getOperand(0);
5422   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5423     if (!N00.getNode()->hasOneUse())
5424       return SDValue();
5425     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5426     if (!N001C || N001C->getZExtValue() != 0xFF)
5427       return SDValue();
5428     N00 = N00.getOperand(0);
5429     LookPassAnd0 = true;
5430   }
5431
5432   SDValue N10 = N1->getOperand(0);
5433   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5434     if (!N10.getNode()->hasOneUse())
5435       return SDValue();
5436     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5437     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5438     // for X86.
5439     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5440                    N101C->getZExtValue() != 0xFFFF))
5441       return SDValue();
5442     N10 = N10.getOperand(0);
5443     LookPassAnd1 = true;
5444   }
5445
5446   if (N00 != N10)
5447     return SDValue();
5448
5449   // Make sure everything beyond the low halfword gets set to zero since the SRL
5450   // 16 will clear the top bits.
5451   unsigned OpSizeInBits = VT.getSizeInBits();
5452   if (DemandHighBits && OpSizeInBits > 16) {
5453     // If the left-shift isn't masked out then the only way this is a bswap is
5454     // if all bits beyond the low 8 are 0. In that case the entire pattern
5455     // reduces to a left shift anyway: leave it for other parts of the combiner.
5456     if (!LookPassAnd0)
5457       return SDValue();
5458
5459     // However, if the right shift isn't masked out then it might be because
5460     // it's not needed. See if we can spot that too.
5461     if (!LookPassAnd1 &&
5462         !DAG.MaskedValueIsZero(
5463             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5464       return SDValue();
5465   }
5466
5467   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5468   if (OpSizeInBits > 16) {
5469     SDLoc DL(N);
5470     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5471                       DAG.getConstant(OpSizeInBits - 16, DL,
5472                                       getShiftAmountTy(VT)));
5473   }
5474   return Res;
5475 }
5476
5477 /// Return true if the specified node is an element that makes up a 32-bit
5478 /// packed halfword byteswap.
5479 /// ((x & 0x000000ff) << 8) |
5480 /// ((x & 0x0000ff00) >> 8) |
5481 /// ((x & 0x00ff0000) << 8) |
5482 /// ((x & 0xff000000) >> 8)
5483 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5484   if (!N.getNode()->hasOneUse())
5485     return false;
5486
5487   unsigned Opc = N.getOpcode();
5488   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5489     return false;
5490
5491   SDValue N0 = N.getOperand(0);
5492   unsigned Opc0 = N0.getOpcode();
5493   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5494     return false;
5495
5496   ConstantSDNode *N1C = nullptr;
5497   // SHL or SRL: look upstream for AND mask operand
5498   if (Opc == ISD::AND)
5499     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5500   else if (Opc0 == ISD::AND)
5501     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5502   if (!N1C)
5503     return false;
5504
5505   unsigned MaskByteOffset;
5506   switch (N1C->getZExtValue()) {
5507   default:
5508     return false;
5509   case 0xFF:       MaskByteOffset = 0; break;
5510   case 0xFF00:     MaskByteOffset = 1; break;
5511   case 0xFFFF:
5512     // In case demanded bits didn't clear the bits that will be shifted out.
5513     // This is needed for X86.
5514     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5515       MaskByteOffset = 1;
5516       break;
5517     }
5518     return false;
5519   case 0xFF0000:   MaskByteOffset = 2; break;
5520   case 0xFF000000: MaskByteOffset = 3; break;
5521   }
5522
5523   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5524   if (Opc == ISD::AND) {
5525     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5526       // (x >> 8) & 0xff
5527       // (x >> 8) & 0xff0000
5528       if (Opc0 != ISD::SRL)
5529         return false;
5530       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5531       if (!C || C->getZExtValue() != 8)
5532         return false;
5533     } else {
5534       // (x << 8) & 0xff00
5535       // (x << 8) & 0xff000000
5536       if (Opc0 != ISD::SHL)
5537         return false;
5538       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5539       if (!C || C->getZExtValue() != 8)
5540         return false;
5541     }
5542   } else if (Opc == ISD::SHL) {
5543     // (x & 0xff) << 8
5544     // (x & 0xff0000) << 8
5545     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5546       return false;
5547     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5548     if (!C || C->getZExtValue() != 8)
5549       return false;
5550   } else { // Opc == ISD::SRL
5551     // (x & 0xff00) >> 8
5552     // (x & 0xff000000) >> 8
5553     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5554       return false;
5555     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5556     if (!C || C->getZExtValue() != 8)
5557       return false;
5558   }
5559
5560   if (Parts[MaskByteOffset])
5561     return false;
5562
5563   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5564   return true;
5565 }
5566
5567 /// Match a 32-bit packed halfword bswap. That is
5568 /// ((x & 0x000000ff) << 8) |
5569 /// ((x & 0x0000ff00) >> 8) |
5570 /// ((x & 0x00ff0000) << 8) |
5571 /// ((x & 0xff000000) >> 8)
5572 /// => (rotl (bswap x), 16)
5573 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5574   if (!LegalOperations)
5575     return SDValue();
5576
5577   EVT VT = N->getValueType(0);
5578   if (VT != MVT::i32)
5579     return SDValue();
5580   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5581     return SDValue();
5582
5583   // Look for either
5584   // (or (or (and), (and)), (or (and), (and)))
5585   // (or (or (or (and), (and)), (and)), (and))
5586   if (N0.getOpcode() != ISD::OR)
5587     return SDValue();
5588   SDValue N00 = N0.getOperand(0);
5589   SDValue N01 = N0.getOperand(1);
5590   SDNode *Parts[4] = {};
5591
5592   if (N1.getOpcode() == ISD::OR &&
5593       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5594     // (or (or (and), (and)), (or (and), (and)))
5595     if (!isBSwapHWordElement(N00, Parts))
5596       return SDValue();
5597
5598     if (!isBSwapHWordElement(N01, Parts))
5599       return SDValue();
5600     SDValue N10 = N1.getOperand(0);
5601     if (!isBSwapHWordElement(N10, Parts))
5602       return SDValue();
5603     SDValue N11 = N1.getOperand(1);
5604     if (!isBSwapHWordElement(N11, Parts))
5605       return SDValue();
5606   } else {
5607     // (or (or (or (and), (and)), (and)), (and))
5608     if (!isBSwapHWordElement(N1, Parts))
5609       return SDValue();
5610     if (!isBSwapHWordElement(N01, Parts))
5611       return SDValue();
5612     if (N00.getOpcode() != ISD::OR)
5613       return SDValue();
5614     SDValue N000 = N00.getOperand(0);
5615     if (!isBSwapHWordElement(N000, Parts))
5616       return SDValue();
5617     SDValue N001 = N00.getOperand(1);
5618     if (!isBSwapHWordElement(N001, Parts))
5619       return SDValue();
5620   }
5621
5622   // Make sure the parts are all coming from the same node.
5623   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5624     return SDValue();
5625
5626   SDLoc DL(N);
5627   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5628                               SDValue(Parts[0], 0));
5629
5630   // Result of the bswap should be rotated by 16. If it's not legal, then
5631   // do  (x << 16) | (x >> 16).
5632   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5633   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5634     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5635   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5636     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5637   return DAG.getNode(ISD::OR, DL, VT,
5638                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5639                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5640 }
5641
5642 /// This contains all DAGCombine rules which reduce two values combined by
5643 /// an Or operation to a single value \see visitANDLike().
5644 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5645   EVT VT = N1.getValueType();
5646   SDLoc DL(N);
5647
5648   // fold (or x, undef) -> -1
5649   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5650     return DAG.getAllOnesConstant(DL, VT);
5651
5652   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5653     return V;
5654
5655   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5656   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5657       // Don't increase # computations.
5658       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5659     // We can only do this xform if we know that bits from X that are set in C2
5660     // but not in C1 are already zero.  Likewise for Y.
5661     if (const ConstantSDNode *N0O1C =
5662         getAsNonOpaqueConstant(N0.getOperand(1))) {
5663       if (const ConstantSDNode *N1O1C =
5664           getAsNonOpaqueConstant(N1.getOperand(1))) {
5665         // We can only do this xform if we know that bits from X that are set in
5666         // C2 but not in C1 are already zero.  Likewise for Y.
5667         const APInt &LHSMask = N0O1C->getAPIntValue();
5668         const APInt &RHSMask = N1O1C->getAPIntValue();
5669
5670         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5671             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5672           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5673                                   N0.getOperand(0), N1.getOperand(0));
5674           return DAG.getNode(ISD::AND, DL, VT, X,
5675                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5676         }
5677       }
5678     }
5679   }
5680
5681   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5682   if (N0.getOpcode() == ISD::AND &&
5683       N1.getOpcode() == ISD::AND &&
5684       N0.getOperand(0) == N1.getOperand(0) &&
5685       // Don't increase # computations.
5686       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5687     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5688                             N0.getOperand(1), N1.getOperand(1));
5689     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5690   }
5691
5692   return SDValue();
5693 }
5694
5695 /// OR combines for which the commuted variant will be tried as well.
5696 static SDValue visitORCommutative(
5697     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5698   EVT VT = N0.getValueType();
5699   if (N0.getOpcode() == ISD::AND) {
5700     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5701     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5702       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5703
5704     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5705     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5706       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5707   }
5708
5709   return SDValue();
5710 }
5711
5712 SDValue DAGCombiner::visitOR(SDNode *N) {
5713   SDValue N0 = N->getOperand(0);
5714   SDValue N1 = N->getOperand(1);
5715   EVT VT = N1.getValueType();
5716
5717   // x | x --> x
5718   if (N0 == N1)
5719     return N0;
5720
5721   // fold vector ops
5722   if (VT.isVector()) {
5723     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5724       return FoldedVOp;
5725
5726     // fold (or x, 0) -> x, vector edition
5727     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5728       return N1;
5729     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5730       return N0;
5731
5732     // fold (or x, -1) -> -1, vector edition
5733     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5734       // do not return N0, because undef node may exist in N0
5735       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5736     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5737       // do not return N1, because undef node may exist in N1
5738       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5739
5740     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5741     // Do this only if the resulting shuffle is legal.
5742     if (isa<ShuffleVectorSDNode>(N0) &&
5743         isa<ShuffleVectorSDNode>(N1) &&
5744         // Avoid folding a node with illegal type.
5745         TLI.isTypeLegal(VT)) {
5746       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5747       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5748       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5749       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5750       // Ensure both shuffles have a zero input.
5751       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5752         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5753         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5754         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5755         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5756         bool CanFold = true;
5757         int NumElts = VT.getVectorNumElements();
5758         SmallVector<int, 4> Mask(NumElts);
5759
5760         for (int i = 0; i != NumElts; ++i) {
5761           int M0 = SV0->getMaskElt(i);
5762           int M1 = SV1->getMaskElt(i);
5763
5764           // Determine if either index is pointing to a zero vector.
5765           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5766           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5767
5768           // If one element is zero and the otherside is undef, keep undef.
5769           // This also handles the case that both are undef.
5770           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5771             Mask[i] = -1;
5772             continue;
5773           }
5774
5775           // Make sure only one of the elements is zero.
5776           if (M0Zero == M1Zero) {
5777             CanFold = false;
5778             break;
5779           }
5780
5781           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5782
5783           // We have a zero and non-zero element. If the non-zero came from
5784           // SV0 make the index a LHS index. If it came from SV1, make it
5785           // a RHS index. We need to mod by NumElts because we don't care
5786           // which operand it came from in the original shuffles.
5787           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5788         }
5789
5790         if (CanFold) {
5791           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5792           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5793
5794           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5795           if (!LegalMask) {
5796             std::swap(NewLHS, NewRHS);
5797             ShuffleVectorSDNode::commuteMask(Mask);
5798             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5799           }
5800
5801           if (LegalMask)
5802             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5803         }
5804       }
5805     }
5806   }
5807
5808   // fold (or c1, c2) -> c1|c2
5809   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5810   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5811   if (N0C && N1C && !N1C->isOpaque())
5812     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5813   // canonicalize constant to RHS
5814   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5815      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5816     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5817   // fold (or x, 0) -> x
5818   if (isNullConstant(N1))
5819     return N0;
5820   // fold (or x, -1) -> -1
5821   if (isAllOnesConstant(N1))
5822     return N1;
5823
5824   if (SDValue NewSel = foldBinOpIntoSelect(N))
5825     return NewSel;
5826
5827   // fold (or x, c) -> c iff (x & ~c) == 0
5828   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5829     return N1;
5830
5831   if (SDValue Combined = visitORLike(N0, N1, N))
5832     return Combined;
5833
5834   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5835   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5836     return BSwap;
5837   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5838     return BSwap;
5839
5840   // reassociate or
5841   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5842     return ROR;
5843
5844   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5845   // iff (c1 & c2) != 0 or c1/c2 are undef.
5846   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5847     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5848   };
5849   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5850       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5851     if (SDValue COR = DAG.FoldConstantArithmetic(
5852             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5853       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5854       AddToWorklist(IOR.getNode());
5855       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5856     }
5857   }
5858
5859   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5860     return Combined;
5861   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5862     return Combined;
5863
5864   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5865   if (N0.getOpcode() == N1.getOpcode())
5866     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5867       return V;
5868
5869   // See if this is some rotate idiom.
5870   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5871     return SDValue(Rot, 0);
5872
5873   if (SDValue Load = MatchLoadCombine(N))
5874     return Load;
5875
5876   // Simplify the operands using demanded-bits information.
5877   if (SimplifyDemandedBits(SDValue(N, 0)))
5878     return SDValue(N, 0);
5879
5880   // If OR can be rewritten into ADD, try combines based on ADD.
5881   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5882       DAG.haveNoCommonBitsSet(N0, N1))
5883     if (SDValue Combined = visitADDLike(N))
5884       return Combined;
5885
5886   return SDValue();
5887 }
5888
5889 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5890   if (Op.getOpcode() == ISD::AND &&
5891       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5892     Mask = Op.getOperand(1);
5893     return Op.getOperand(0);
5894   }
5895   return Op;
5896 }
5897
5898 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5899 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5900                             SDValue &Mask) {
5901   Op = stripConstantMask(DAG, Op, Mask);
5902   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5903     Shift = Op;
5904     return true;
5905   }
5906   return false;
5907 }
5908
5909 /// Helper function for visitOR to extract the needed side of a rotate idiom
5910 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5911 /// InstCombine merged some outside op with one of the shifts from
5912 /// the rotate pattern.
5913 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5914 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5915 /// patterns:
5916 ///
5917 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5918 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5919 ///
5920 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5921 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5922 ///
5923 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5924 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5925 ///
5926 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5927 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5928 ///
5929 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5930 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5931                                      SDValue ExtractFrom, SDValue &Mask,
5932                                      const SDLoc &DL) {
5933   assert(OppShift && ExtractFrom && "Empty SDValue");
5934   assert(
5935       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5936       "Existing shift must be valid as a rotate half");
5937
5938   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5939   // Preconditions:
5940   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5941   //
5942   // Find opcode of the needed shift to be extracted from (op0 v c0).
5943   unsigned Opcode = ISD::DELETED_NODE;
5944   bool IsMulOrDiv = false;
5945   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5946   // opcode or its arithmetic (mul or udiv) variant.
5947   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5948     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5949     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5950       return false;
5951     Opcode = NeededShift;
5952     return true;
5953   };
5954   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5955   // that the needed shift can be extracted from.
5956   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5957       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5958     return SDValue();
5959
5960   // op0 must be the same opcode on both sides, have the same LHS argument,
5961   // and produce the same value type.
5962   SDValue OppShiftLHS = OppShift.getOperand(0);
5963   EVT ShiftedVT = OppShiftLHS.getValueType();
5964   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5965       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5966       ShiftedVT != ExtractFrom.getValueType())
5967     return SDValue();
5968
5969   // Amount of the existing shift.
5970   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5971   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5972   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5973   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5974   ConstantSDNode *ExtractFromCst =
5975       isConstOrConstSplat(ExtractFrom.getOperand(1));
5976   // TODO: We should be able to handle non-uniform constant vectors for these values
5977   // Check that we have constant values.
5978   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5979       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5980       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5981     return SDValue();
5982
5983   // Compute the shift amount we need to extract to complete the rotate.
5984   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5985   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5986     return SDValue();
5987   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5988   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5989   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5990   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5991   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5992
5993   // Now try extract the needed shift from the ExtractFrom op and see if the
5994   // result matches up with the existing shift's LHS op.
5995   if (IsMulOrDiv) {
5996     // Op to extract from is a mul or udiv by a constant.
5997     // Check:
5998     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5999     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6000     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6001                                                  NeededShiftAmt.getZExtValue());
6002     APInt ResultAmt;
6003     APInt Rem;
6004     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6005     if (Rem != 0 || ResultAmt != OppLHSAmt)
6006       return SDValue();
6007   } else {
6008     // Op to extract from is a shift by a constant.
6009     // Check:
6010     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6011     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6012                                           ExtractFromAmt.getBitWidth()))
6013       return SDValue();
6014   }
6015
6016   // Return the expanded shift op that should allow a rotate to be formed.
6017   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6018   EVT ResVT = ExtractFrom.getValueType();
6019   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6020   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6021 }
6022
6023 // Return true if we can prove that, whenever Neg and Pos are both in the
6024 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6025 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6026 //
6027 //     (or (shift1 X, Neg), (shift2 X, Pos))
6028 //
6029 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6030 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6031 // to consider shift amounts with defined behavior.
6032 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6033                            SelectionDAG &DAG) {
6034   // If EltSize is a power of 2 then:
6035   //
6036   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6037   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6038   //
6039   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6040   // for the stronger condition:
6041   //
6042   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6043   //
6044   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6045   // we can just replace Neg with Neg' for the rest of the function.
6046   //
6047   // In other cases we check for the even stronger condition:
6048   //
6049   //     Neg == EltSize - Pos                                    [B]
6050   //
6051   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6052   // behavior if Pos == 0 (and consequently Neg == EltSize).
6053   //
6054   // We could actually use [A] whenever EltSize is a power of 2, but the
6055   // only extra cases that it would match are those uninteresting ones
6056   // where Neg and Pos are never in range at the same time.  E.g. for
6057   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6058   // as well as (sub 32, Pos), but:
6059   //
6060   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6061   //
6062   // always invokes undefined behavior for 32-bit X.
6063   //
6064   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6065   unsigned MaskLoBits = 0;
6066   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6067     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6068       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6069       unsigned Bits = Log2_64(EltSize);
6070       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6071           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6072         Neg = Neg.getOperand(0);
6073         MaskLoBits = Bits;
6074       }
6075     }
6076   }
6077
6078   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6079   if (Neg.getOpcode() != ISD::SUB)
6080     return false;
6081   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6082   if (!NegC)
6083     return false;
6084   SDValue NegOp1 = Neg.getOperand(1);
6085
6086   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6087   // Pos'.  The truncation is redundant for the purpose of the equality.
6088   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6089     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6090       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6091       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6092           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6093            MaskLoBits))
6094         Pos = Pos.getOperand(0);
6095     }
6096   }
6097
6098   // The condition we need is now:
6099   //
6100   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6101   //
6102   // If NegOp1 == Pos then we need:
6103   //
6104   //              EltSize & Mask == NegC & Mask
6105   //
6106   // (because "x & Mask" is a truncation and distributes through subtraction).
6107   APInt Width;
6108   if (Pos == NegOp1)
6109     Width = NegC->getAPIntValue();
6110
6111   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6112   // Then the condition we want to prove becomes:
6113   //
6114   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6115   //
6116   // which, again because "x & Mask" is a truncation, becomes:
6117   //
6118   //                NegC & Mask == (EltSize - PosC) & Mask
6119   //             EltSize & Mask == (NegC + PosC) & Mask
6120   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6121     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6122       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6123     else
6124       return false;
6125   } else
6126     return false;
6127
6128   // Now we just need to check that EltSize & Mask == Width & Mask.
6129   if (MaskLoBits)
6130     // EltSize & Mask is 0 since Mask is EltSize - 1.
6131     return Width.getLoBits(MaskLoBits) == 0;
6132   return Width == EltSize;
6133 }
6134
6135 // A subroutine of MatchRotate used once we have found an OR of two opposite
6136 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6137 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6138 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6139 // Neg with outer conversions stripped away.
6140 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6141                                        SDValue Neg, SDValue InnerPos,
6142                                        SDValue InnerNeg, unsigned PosOpcode,
6143                                        unsigned NegOpcode, const SDLoc &DL) {
6144   // fold (or (shl x, (*ext y)),
6145   //          (srl x, (*ext (sub 32, y)))) ->
6146   //   (rotl x, y) or (rotr x, (sub 32, y))
6147   //
6148   // fold (or (shl x, (*ext (sub 32, y))),
6149   //          (srl x, (*ext y))) ->
6150   //   (rotr x, y) or (rotl x, (sub 32, y))
6151   EVT VT = Shifted.getValueType();
6152   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6153     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6154     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6155                        HasPos ? Pos : Neg).getNode();
6156   }
6157
6158   return nullptr;
6159 }
6160
6161 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6162 // idioms for rotate, and if the target supports rotation instructions, generate
6163 // a rot[lr].
6164 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6165   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6166   EVT VT = LHS.getValueType();
6167   if (!TLI.isTypeLegal(VT)) return nullptr;
6168
6169   // The target must have at least one rotate flavor.
6170   bool HasROTL = hasOperation(ISD::ROTL, VT);
6171   bool HasROTR = hasOperation(ISD::ROTR, VT);
6172   if (!HasROTL && !HasROTR) return nullptr;
6173
6174   // Check for truncated rotate.
6175   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6176       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6177     assert(LHS.getValueType() == RHS.getValueType());
6178     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6179       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
6180                          SDValue(Rot, 0)).getNode();
6181     }
6182   }
6183
6184   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6185   SDValue LHSShift;   // The shift.
6186   SDValue LHSMask;    // AND value if any.
6187   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6188
6189   SDValue RHSShift;   // The shift.
6190   SDValue RHSMask;    // AND value if any.
6191   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6192
6193   // If neither side matched a rotate half, bail
6194   if (!LHSShift && !RHSShift)
6195     return nullptr;
6196
6197   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6198   // side of the rotate, so try to handle that here. In all cases we need to
6199   // pass the matched shift from the opposite side to compute the opcode and
6200   // needed shift amount to extract.  We still want to do this if both sides
6201   // matched a rotate half because one half may be a potential overshift that
6202   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6203   // single one).
6204
6205   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6206   if (LHSShift)
6207     if (SDValue NewRHSShift =
6208             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6209       RHSShift = NewRHSShift;
6210   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6211   if (RHSShift)
6212     if (SDValue NewLHSShift =
6213             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6214       LHSShift = NewLHSShift;
6215
6216   // If a side is still missing, nothing else we can do.
6217   if (!RHSShift || !LHSShift)
6218     return nullptr;
6219
6220   // At this point we've matched or extracted a shift op on each side.
6221
6222   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6223     return nullptr;   // Not shifting the same value.
6224
6225   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6226     return nullptr;   // Shifts must disagree.
6227
6228   // Canonicalize shl to left side in a shl/srl pair.
6229   if (RHSShift.getOpcode() == ISD::SHL) {
6230     std::swap(LHS, RHS);
6231     std::swap(LHSShift, RHSShift);
6232     std::swap(LHSMask, RHSMask);
6233   }
6234
6235   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6236   SDValue LHSShiftArg = LHSShift.getOperand(0);
6237   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6238   SDValue RHSShiftArg = RHSShift.getOperand(0);
6239   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6240
6241   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6242   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6243   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6244                                         ConstantSDNode *RHS) {
6245     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6246   };
6247   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6248     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6249                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6250
6251     // If there is an AND of either shifted operand, apply it to the result.
6252     if (LHSMask.getNode() || RHSMask.getNode()) {
6253       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6254       SDValue Mask = AllOnes;
6255
6256       if (LHSMask.getNode()) {
6257         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6258         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6259                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6260       }
6261       if (RHSMask.getNode()) {
6262         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6263         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6264                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6265       }
6266
6267       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6268     }
6269
6270     return Rot.getNode();
6271   }
6272
6273   // If there is a mask here, and we have a variable shift, we can't be sure
6274   // that we're masking out the right stuff.
6275   if (LHSMask.getNode() || RHSMask.getNode())
6276     return nullptr;
6277
6278   // If the shift amount is sign/zext/any-extended just peel it off.
6279   SDValue LExtOp0 = LHSShiftAmt;
6280   SDValue RExtOp0 = RHSShiftAmt;
6281   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6282        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6283        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6284        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6285       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6286        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6287        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6288        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6289     LExtOp0 = LHSShiftAmt.getOperand(0);
6290     RExtOp0 = RHSShiftAmt.getOperand(0);
6291   }
6292
6293   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6294                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6295   if (TryL)
6296     return TryL;
6297
6298   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6299                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6300   if (TryR)
6301     return TryR;
6302
6303   return nullptr;
6304 }
6305
6306 namespace {
6307
6308 /// Represents known origin of an individual byte in load combine pattern. The
6309 /// value of the byte is either constant zero or comes from memory.
6310 struct ByteProvider {
6311   // For constant zero providers Load is set to nullptr. For memory providers
6312   // Load represents the node which loads the byte from memory.
6313   // ByteOffset is the offset of the byte in the value produced by the load.
6314   LoadSDNode *Load = nullptr;
6315   unsigned ByteOffset = 0;
6316
6317   ByteProvider() = default;
6318
6319   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6320     return ByteProvider(Load, ByteOffset);
6321   }
6322
6323   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6324
6325   bool isConstantZero() const { return !Load; }
6326   bool isMemory() const { return Load; }
6327
6328   bool operator==(const ByteProvider &Other) const {
6329     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6330   }
6331
6332 private:
6333   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6334       : Load(Load), ByteOffset(ByteOffset) {}
6335 };
6336
6337 } // end anonymous namespace
6338
6339 /// Recursively traverses the expression calculating the origin of the requested
6340 /// byte of the given value. Returns None if the provider can't be calculated.
6341 ///
6342 /// For all the values except the root of the expression verifies that the value
6343 /// has exactly one use and if it's not true return None. This way if the origin
6344 /// of the byte is returned it's guaranteed that the values which contribute to
6345 /// the byte are not used outside of this expression.
6346 ///
6347 /// Because the parts of the expression are not allowed to have more than one
6348 /// use this function iterates over trees, not DAGs. So it never visits the same
6349 /// node more than once.
6350 static const Optional<ByteProvider>
6351 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6352                       bool Root = false) {
6353   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6354   if (Depth == 10)
6355     return None;
6356
6357   if (!Root && !Op.hasOneUse())
6358     return None;
6359
6360   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6361   unsigned BitWidth = Op.getValueSizeInBits();
6362   if (BitWidth % 8 != 0)
6363     return None;
6364   unsigned ByteWidth = BitWidth / 8;
6365   assert(Index < ByteWidth && "invalid index requested");
6366   (void) ByteWidth;
6367
6368   switch (Op.getOpcode()) {
6369   case ISD::OR: {
6370     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6371     if (!LHS)
6372       return None;
6373     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6374     if (!RHS)
6375       return None;
6376
6377     if (LHS->isConstantZero())
6378       return RHS;
6379     if (RHS->isConstantZero())
6380       return LHS;
6381     return None;
6382   }
6383   case ISD::SHL: {
6384     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6385     if (!ShiftOp)
6386       return None;
6387
6388     uint64_t BitShift = ShiftOp->getZExtValue();
6389     if (BitShift % 8 != 0)
6390       return None;
6391     uint64_t ByteShift = BitShift / 8;
6392
6393     return Index < ByteShift
6394                ? ByteProvider::getConstantZero()
6395                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6396                                        Depth + 1);
6397   }
6398   case ISD::ANY_EXTEND:
6399   case ISD::SIGN_EXTEND:
6400   case ISD::ZERO_EXTEND: {
6401     SDValue NarrowOp = Op->getOperand(0);
6402     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6403     if (NarrowBitWidth % 8 != 0)
6404       return None;
6405     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6406
6407     if (Index >= NarrowByteWidth)
6408       return Op.getOpcode() == ISD::ZERO_EXTEND
6409                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6410                  : None;
6411     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6412   }
6413   case ISD::BSWAP:
6414     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6415                                  Depth + 1);
6416   case ISD::LOAD: {
6417     auto L = cast<LoadSDNode>(Op.getNode());
6418     if (L->isVolatile() || L->isIndexed())
6419       return None;
6420
6421     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6422     if (NarrowBitWidth % 8 != 0)
6423       return None;
6424     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6425
6426     if (Index >= NarrowByteWidth)
6427       return L->getExtensionType() == ISD::ZEXTLOAD
6428                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6429                  : None;
6430     return ByteProvider::getMemory(L, Index);
6431   }
6432   }
6433
6434   return None;
6435 }
6436
6437 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6438   return i;
6439 }
6440
6441 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6442   return BW - i - 1;
6443 }
6444
6445 // Check if the bytes offsets we are looking at match with either big or
6446 // little endian value loaded. Return true for big endian, false for little
6447 // endian, and None if match failed.
6448 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6449                                   int64_t FirstOffset) {
6450   // The endian can be decided only when it is 2 bytes at least.
6451   unsigned Width = ByteOffsets.size();
6452   if (Width < 2)
6453     return None;
6454
6455   bool BigEndian = true, LittleEndian = true;
6456   for (unsigned i = 0; i < Width; i++) {
6457     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6458     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6459     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6460     if (!BigEndian && !LittleEndian)
6461       return None;
6462   }
6463
6464   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6465                                         "little endian");
6466   return BigEndian;
6467 }
6468
6469 static SDValue stripTruncAndExt(SDValue Value) {
6470   switch (Value.getOpcode()) {
6471   case ISD::TRUNCATE:
6472   case ISD::ZERO_EXTEND:
6473   case ISD::SIGN_EXTEND:
6474   case ISD::ANY_EXTEND:
6475     return stripTruncAndExt(Value.getOperand(0));
6476   }
6477   return Value;
6478 }
6479
6480 /// Match a pattern where a wide type scalar value is stored by several narrow
6481 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6482 /// supports it.
6483 ///
6484 /// Assuming little endian target:
6485 ///  i8 *p = ...
6486 ///  i32 val = ...
6487 ///  p[0] = (val >> 0) & 0xFF;
6488 ///  p[1] = (val >> 8) & 0xFF;
6489 ///  p[2] = (val >> 16) & 0xFF;
6490 ///  p[3] = (val >> 24) & 0xFF;
6491 /// =>
6492 ///  *((i32)p) = val;
6493 ///
6494 ///  i8 *p = ...
6495 ///  i32 val = ...
6496 ///  p[0] = (val >> 24) & 0xFF;
6497 ///  p[1] = (val >> 16) & 0xFF;
6498 ///  p[2] = (val >> 8) & 0xFF;
6499 ///  p[3] = (val >> 0) & 0xFF;
6500 /// =>
6501 ///  *((i32)p) = BSWAP(val);
6502 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6503   // Collect all the stores in the chain.
6504   SDValue Chain;
6505   SmallVector<StoreSDNode *, 8> Stores;
6506   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6507     if (Store->getMemoryVT() != MVT::i8 ||
6508         Store->isVolatile() || Store->isIndexed())
6509       return SDValue();
6510     Stores.push_back(Store);
6511     Chain = Store->getChain();
6512   }
6513   // Handle the simple type only.
6514   unsigned Width = Stores.size();
6515   EVT VT = EVT::getIntegerVT(
6516     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6517   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6518     return SDValue();
6519
6520   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6521   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6522     return SDValue();
6523
6524   // Check if all the bytes of the combined value we are looking at are stored
6525   // to the same base address. Collect bytes offsets from Base address into
6526   // ByteOffsets.
6527   SDValue CombinedValue;
6528   SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
6529   int64_t FirstOffset = INT64_MAX;
6530   StoreSDNode *FirstStore = nullptr;
6531   Optional<BaseIndexOffset> Base;
6532   for (auto Store : Stores) {
6533     // All the stores store different byte of the CombinedValue. A truncate is
6534     // required to get that byte value.
6535     SDValue Trunc = Store->getValue();
6536     if (Trunc.getOpcode() != ISD::TRUNCATE)
6537       return SDValue();
6538     // A shift operation is required to get the right byte offset, except the
6539     // first byte.
6540     int64_t Offset = 0;
6541     SDValue Value = Trunc.getOperand(0);
6542     if (Value.getOpcode() == ISD::SRL ||
6543         Value.getOpcode() == ISD::SRA) {
6544       ConstantSDNode *ShiftOffset =
6545         dyn_cast<ConstantSDNode>(Value.getOperand(1));
6546       // Trying to match the following pattern. The shift offset must be
6547       // a constant and a multiple of 8. It is the byte offset in "y".
6548       //
6549       // x = srl y, offset
6550       // i8 z = trunc x
6551       // store z, ...
6552       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6553         return SDValue();
6554
6555      Offset = ShiftOffset->getSExtValue()/8;
6556      Value = Value.getOperand(0);
6557     }
6558
6559     // Stores must share the same combined value with different offsets.
6560     if (!CombinedValue)
6561       CombinedValue = Value;
6562     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6563       return SDValue();
6564
6565     // The trunc and all the extend operation should be stripped to get the
6566     // real value we are stored.
6567     else if (CombinedValue.getValueType() != VT) {
6568       if (Value.getValueType() == VT ||
6569           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6570         CombinedValue = Value;
6571       // Give up if the combined value type is smaller than the store size.
6572       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6573         return SDValue();
6574     }
6575
6576     // Stores must share the same base address
6577     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6578     int64_t ByteOffsetFromBase = 0;
6579     if (!Base)
6580       Base = Ptr;
6581     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6582       return SDValue();
6583
6584     // Remember the first byte store
6585     if (ByteOffsetFromBase < FirstOffset) {
6586       FirstStore = Store;
6587       FirstOffset = ByteOffsetFromBase;
6588     }
6589     // Map the offset in the store and the offset in the combined value, and
6590     // early return if it has been set before.
6591     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6592       return SDValue();
6593     ByteOffsets[Offset] = ByteOffsetFromBase;
6594   }
6595
6596   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6597   assert(FirstStore && "First store must be set");
6598
6599   // Check if the bytes of the combined value we are looking at match with
6600   // either big or little endian value store.
6601   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6602   if (!IsBigEndian.hasValue())
6603     return SDValue();
6604
6605   // The node we are looking at matches with the pattern, check if we can
6606   // replace it with a single bswap if needed and store.
6607
6608   // If the store needs byte swap check if the target supports it
6609   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6610
6611   // Before legalize we can introduce illegal bswaps which will be later
6612   // converted to an explicit bswap sequence. This way we end up with a single
6613   // store and byte shuffling instead of several stores and byte shuffling.
6614   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6615     return SDValue();
6616
6617   // Check that a store of the wide type is both allowed and fast on the target
6618   bool Fast = false;
6619   bool Allowed =
6620       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6621                              *FirstStore->getMemOperand(), &Fast);
6622   if (!Allowed || !Fast)
6623     return SDValue();
6624
6625   if (VT != CombinedValue.getValueType()) {
6626     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6627            "Get unexpected store value to combine");
6628     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6629                              CombinedValue);
6630   }
6631
6632   if (NeedsBswap)
6633     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6634
6635   SDValue NewStore =
6636     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6637                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6638
6639   // Rely on other DAG combine rules to remove the other individual stores.
6640   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6641   return NewStore;
6642 }
6643
6644 /// Match a pattern where a wide type scalar value is loaded by several narrow
6645 /// loads and combined by shifts and ors. Fold it into a single load or a load
6646 /// and a BSWAP if the targets supports it.
6647 ///
6648 /// Assuming little endian target:
6649 ///  i8 *a = ...
6650 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6651 /// =>
6652 ///  i32 val = *((i32)a)
6653 ///
6654 ///  i8 *a = ...
6655 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6656 /// =>
6657 ///  i32 val = BSWAP(*((i32)a))
6658 ///
6659 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6660 /// interact well with the worklist mechanism. When a part of the pattern is
6661 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6662 /// but the root node of the pattern which triggers the load combine is not
6663 /// necessarily a direct user of the changed node. For example, once the address
6664 /// of t28 load is reassociated load combine won't be triggered:
6665 ///             t25: i32 = add t4, Constant:i32<2>
6666 ///           t26: i64 = sign_extend t25
6667 ///        t27: i64 = add t2, t26
6668 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6669 ///     t29: i32 = zero_extend t28
6670 ///   t32: i32 = shl t29, Constant:i8<8>
6671 /// t33: i32 = or t23, t32
6672 /// As a possible fix visitLoad can check if the load can be a part of a load
6673 /// combine pattern and add corresponding OR roots to the worklist.
6674 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6675   assert(N->getOpcode() == ISD::OR &&
6676          "Can only match load combining against OR nodes");
6677
6678   // Handles simple types only
6679   EVT VT = N->getValueType(0);
6680   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6681     return SDValue();
6682   unsigned ByteWidth = VT.getSizeInBits() / 8;
6683
6684   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6685   // Before legalize we can introduce too wide illegal loads which will be later
6686   // split into legal sized loads. This enables us to combine i64 load by i8
6687   // patterns to a couple of i32 loads on 32 bit targets.
6688   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6689     return SDValue();
6690
6691   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6692   auto MemoryByteOffset = [&] (ByteProvider P) {
6693     assert(P.isMemory() && "Must be a memory byte provider");
6694     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6695     assert(LoadBitWidth % 8 == 0 &&
6696            "can only analyze providers for individual bytes not bit");
6697     unsigned LoadByteWidth = LoadBitWidth / 8;
6698     return IsBigEndianTarget
6699             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6700             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6701   };
6702
6703   Optional<BaseIndexOffset> Base;
6704   SDValue Chain;
6705
6706   SmallPtrSet<LoadSDNode *, 8> Loads;
6707   Optional<ByteProvider> FirstByteProvider;
6708   int64_t FirstOffset = INT64_MAX;
6709
6710   // Check if all the bytes of the OR we are looking at are loaded from the same
6711   // base address. Collect bytes offsets from Base address in ByteOffsets.
6712   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6713   for (unsigned i = 0; i < ByteWidth; i++) {
6714     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6715     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6716       return SDValue();
6717
6718     LoadSDNode *L = P->Load;
6719     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6720            "Must be enforced by calculateByteProvider");
6721     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6722
6723     // All loads must share the same chain
6724     SDValue LChain = L->getChain();
6725     if (!Chain)
6726       Chain = LChain;
6727     else if (Chain != LChain)
6728       return SDValue();
6729
6730     // Loads must share the same base address
6731     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6732     int64_t ByteOffsetFromBase = 0;
6733     if (!Base)
6734       Base = Ptr;
6735     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6736       return SDValue();
6737
6738     // Calculate the offset of the current byte from the base address
6739     ByteOffsetFromBase += MemoryByteOffset(*P);
6740     ByteOffsets[i] = ByteOffsetFromBase;
6741
6742     // Remember the first byte load
6743     if (ByteOffsetFromBase < FirstOffset) {
6744       FirstByteProvider = P;
6745       FirstOffset = ByteOffsetFromBase;
6746     }
6747
6748     Loads.insert(L);
6749   }
6750   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6751          "memory, so there must be at least one load which produces the value");
6752   assert(Base && "Base address of the accessed memory location must be set");
6753   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6754
6755   // Check if the bytes of the OR we are looking at match with either big or
6756   // little endian value load
6757   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6758   if (!IsBigEndian.hasValue())
6759     return SDValue();
6760
6761   assert(FirstByteProvider && "must be set");
6762
6763   // Ensure that the first byte is loaded from zero offset of the first load.
6764   // So the combined value can be loaded from the first load address.
6765   if (MemoryByteOffset(*FirstByteProvider) != 0)
6766     return SDValue();
6767   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6768
6769   // The node we are looking at matches with the pattern, check if we can
6770   // replace it with a single load and bswap if needed.
6771
6772   // If the load needs byte swap check if the target supports it
6773   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6774
6775   // Before legalize we can introduce illegal bswaps which will be later
6776   // converted to an explicit bswap sequence. This way we end up with a single
6777   // load and byte shuffling instead of several loads and byte shuffling.
6778   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6779     return SDValue();
6780
6781   // Check that a load of the wide type is both allowed and fast on the target
6782   bool Fast = false;
6783   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6784                                         VT, *FirstLoad->getMemOperand(), &Fast);
6785   if (!Allowed || !Fast)
6786     return SDValue();
6787
6788   SDValue NewLoad =
6789       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6790                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6791
6792   // Transfer chain users from old loads to the new load.
6793   for (LoadSDNode *L : Loads)
6794     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6795
6796   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6797 }
6798
6799 // If the target has andn, bsl, or a similar bit-select instruction,
6800 // we want to unfold masked merge, with canonical pattern of:
6801 //   |        A  |  |B|
6802 //   ((x ^ y) & m) ^ y
6803 //    |  D  |
6804 // Into:
6805 //   (x & m) | (y & ~m)
6806 // If y is a constant, and the 'andn' does not work with immediates,
6807 // we unfold into a different pattern:
6808 //   ~(~x & m) & (m | y)
6809 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6810 //       the very least that breaks andnpd / andnps patterns, and because those
6811 //       patterns are simplified in IR and shouldn't be created in the DAG
6812 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6813   assert(N->getOpcode() == ISD::XOR);
6814
6815   // Don't touch 'not' (i.e. where y = -1).
6816   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6817     return SDValue();
6818
6819   EVT VT = N->getValueType(0);
6820
6821   // There are 3 commutable operators in the pattern,
6822   // so we have to deal with 8 possible variants of the basic pattern.
6823   SDValue X, Y, M;
6824   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6825     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6826       return false;
6827     SDValue Xor = And.getOperand(XorIdx);
6828     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6829       return false;
6830     SDValue Xor0 = Xor.getOperand(0);
6831     SDValue Xor1 = Xor.getOperand(1);
6832     // Don't touch 'not' (i.e. where y = -1).
6833     if (isAllOnesOrAllOnesSplat(Xor1))
6834       return false;
6835     if (Other == Xor0)
6836       std::swap(Xor0, Xor1);
6837     if (Other != Xor1)
6838       return false;
6839     X = Xor0;
6840     Y = Xor1;
6841     M = And.getOperand(XorIdx ? 0 : 1);
6842     return true;
6843   };
6844
6845   SDValue N0 = N->getOperand(0);
6846   SDValue N1 = N->getOperand(1);
6847   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6848       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6849     return SDValue();
6850
6851   // Don't do anything if the mask is constant. This should not be reachable.
6852   // InstCombine should have already unfolded this pattern, and DAGCombiner
6853   // probably shouldn't produce it, too.
6854   if (isa<ConstantSDNode>(M.getNode()))
6855     return SDValue();
6856
6857   // We can transform if the target has AndNot
6858   if (!TLI.hasAndNot(M))
6859     return SDValue();
6860
6861   SDLoc DL(N);
6862
6863   // If Y is a constant, check that 'andn' works with immediates.
6864   if (!TLI.hasAndNot(Y)) {
6865     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6866     // If not, we need to do a bit more work to make sure andn is still used.
6867     SDValue NotX = DAG.getNOT(DL, X, VT);
6868     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6869     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6870     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6871     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6872   }
6873
6874   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6875   SDValue NotM = DAG.getNOT(DL, M, VT);
6876   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6877
6878   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6879 }
6880
6881 SDValue DAGCombiner::visitXOR(SDNode *N) {
6882   SDValue N0 = N->getOperand(0);
6883   SDValue N1 = N->getOperand(1);
6884   EVT VT = N0.getValueType();
6885
6886   // fold vector ops
6887   if (VT.isVector()) {
6888     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6889       return FoldedVOp;
6890
6891     // fold (xor x, 0) -> x, vector edition
6892     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6893       return N1;
6894     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6895       return N0;
6896   }
6897
6898   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6899   SDLoc DL(N);
6900   if (N0.isUndef() && N1.isUndef())
6901     return DAG.getConstant(0, DL, VT);
6902   // fold (xor x, undef) -> undef
6903   if (N0.isUndef())
6904     return N0;
6905   if (N1.isUndef())
6906     return N1;
6907   // fold (xor c1, c2) -> c1^c2
6908   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6909   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6910   if (N0C && N1C)
6911     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6912   // canonicalize constant to RHS
6913   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6914      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6915     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6916   // fold (xor x, 0) -> x
6917   if (isNullConstant(N1))
6918     return N0;
6919
6920   if (SDValue NewSel = foldBinOpIntoSelect(N))
6921     return NewSel;
6922
6923   // reassociate xor
6924   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6925     return RXOR;
6926
6927   // fold !(x cc y) -> (x !cc y)
6928   unsigned N0Opcode = N0.getOpcode();
6929   SDValue LHS, RHS, CC;
6930   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6931     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6932                                                LHS.getValueType().isInteger());
6933     if (!LegalOperations ||
6934         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6935       switch (N0Opcode) {
6936       default:
6937         llvm_unreachable("Unhandled SetCC Equivalent!");
6938       case ISD::SETCC:
6939         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6940       case ISD::SELECT_CC:
6941         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6942                                N0.getOperand(3), NotCC);
6943       }
6944     }
6945   }
6946
6947   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6948   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6949       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6950     SDValue V = N0.getOperand(0);
6951     SDLoc DL0(N0);
6952     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6953                     DAG.getConstant(1, DL0, V.getValueType()));
6954     AddToWorklist(V.getNode());
6955     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6956   }
6957
6958   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6959   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6960       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6961     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6962     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6963       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6964       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6965       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6966       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6967       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6968     }
6969   }
6970   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6971   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6972       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6973     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6974     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6975       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6976       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6977       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6978       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6979       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6980     }
6981   }
6982
6983   // fold (not (neg x)) -> (add X, -1)
6984   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
6985   // Y is a constant or the subtract has a single use.
6986   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
6987       isNullConstant(N0.getOperand(0))) {
6988     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
6989                        DAG.getAllOnesConstant(DL, VT));
6990   }
6991
6992   // fold (xor (and x, y), y) -> (and (not x), y)
6993   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6994     SDValue X = N0.getOperand(0);
6995     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6996     AddToWorklist(NotX.getNode());
6997     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6998   }
6999
7000   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7001     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7002     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7003     unsigned BitWidth = VT.getScalarSizeInBits();
7004     if (XorC && ShiftC) {
7005       // Don't crash on an oversized shift. We can not guarantee that a bogus
7006       // shift has been simplified to undef.
7007       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7008       if (ShiftAmt < BitWidth) {
7009         APInt Ones = APInt::getAllOnesValue(BitWidth);
7010         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7011         if (XorC->getAPIntValue() == Ones) {
7012           // If the xor constant is a shifted -1, do a 'not' before the shift:
7013           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7014           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7015           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7016           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7017         }
7018       }
7019     }
7020   }
7021
7022   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7023   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7024     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7025     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7026     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7027       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7028       SDValue S0 = S.getOperand(0);
7029       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7030         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7031         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7032           if (C->getAPIntValue() == (OpSizeInBits - 1))
7033             return DAG.getNode(ISD::ABS, DL, VT, S0);
7034       }
7035     }
7036   }
7037
7038   // fold (xor x, x) -> 0
7039   if (N0 == N1)
7040     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7041
7042   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7043   // Here is a concrete example of this equivalence:
7044   // i16   x ==  14
7045   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7046   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7047   //
7048   // =>
7049   //
7050   // i16     ~1      == 0b1111111111111110
7051   // i16 rol(~1, 14) == 0b1011111111111111
7052   //
7053   // Some additional tips to help conceptualize this transform:
7054   // - Try to see the operation as placing a single zero in a value of all ones.
7055   // - There exists no value for x which would allow the result to contain zero.
7056   // - Values of x larger than the bitwidth are undefined and do not require a
7057   //   consistent result.
7058   // - Pushing the zero left requires shifting one bits in from the right.
7059   // A rotate left of ~1 is a nice way of achieving the desired result.
7060   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7061       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7062     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7063                        N0.getOperand(1));
7064   }
7065
7066   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7067   if (N0Opcode == N1.getOpcode())
7068     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7069       return V;
7070
7071   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7072   if (SDValue MM = unfoldMaskedMerge(N))
7073     return MM;
7074
7075   // Simplify the expression using non-local knowledge.
7076   if (SimplifyDemandedBits(SDValue(N, 0)))
7077     return SDValue(N, 0);
7078
7079   return SDValue();
7080 }
7081
7082 /// Handle transforms common to the three shifts, when the shift amount is a
7083 /// constant.
7084 /// We are looking for: (shift being one of shl/sra/srl)
7085 ///   shift (binop X, C0), C1
7086 /// And want to transform into:
7087 ///   binop (shift X, C1), (shift C0, C1)
7088 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
7089   // Do not turn a 'not' into a regular xor.
7090   if (isBitwiseNot(N->getOperand(0)))
7091     return SDValue();
7092
7093   // The inner binop must be one-use, since we want to replace it.
7094   SDNode *LHS = N->getOperand(0).getNode();
7095   if (!LHS->hasOneUse()) return SDValue();
7096
7097   // We want to pull some binops through shifts, so that we have (and (shift))
7098   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7099   // thing happens with address calculations, so it's important to canonicalize
7100   // it.
7101   switch (LHS->getOpcode()) {
7102   default:
7103     return SDValue();
7104   case ISD::OR:
7105   case ISD::XOR:
7106   case ISD::AND:
7107     break;
7108   case ISD::ADD:
7109     if (N->getOpcode() != ISD::SHL)
7110       return SDValue(); // only shl(add) not sr[al](add).
7111     break;
7112   }
7113
7114   // We require the RHS of the binop to be a constant and not opaque as well.
7115   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
7116   if (!BinOpCst)
7117     return SDValue();
7118
7119   // FIXME: disable this unless the input to the binop is a shift by a constant
7120   // or is copy/select. Enable this in other cases when figure out it's exactly
7121   // profitable.
7122   SDValue BinOpLHSVal = LHS->getOperand(0);
7123   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7124                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7125                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7126                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7127   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7128                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7129
7130   if (!IsShiftByConstant && !IsCopyOrSelect)
7131     return SDValue();
7132
7133   if (IsCopyOrSelect && N->hasOneUse())
7134     return SDValue();
7135
7136   EVT VT = N->getValueType(0);
7137
7138   if (!TLI.isDesirableToCommuteWithShift(N, Level))
7139     return SDValue();
7140
7141   // Fold the constants, shifting the binop RHS by the shift amount.
7142   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
7143                                N->getValueType(0),
7144                                LHS->getOperand(1), N->getOperand(1));
7145   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7146
7147   // Create the new shift.
7148   SDValue NewShift = DAG.getNode(N->getOpcode(),
7149                                  SDLoc(LHS->getOperand(0)),
7150                                  VT, LHS->getOperand(0), N->getOperand(1));
7151
7152   // Create the new binop.
7153   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
7154 }
7155
7156 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7157   assert(N->getOpcode() == ISD::TRUNCATE);
7158   assert(N->getOperand(0).getOpcode() == ISD::AND);
7159
7160   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7161   EVT TruncVT = N->getValueType(0);
7162   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7163       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7164     SDValue N01 = N->getOperand(0).getOperand(1);
7165     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7166       SDLoc DL(N);
7167       SDValue N00 = N->getOperand(0).getOperand(0);
7168       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7169       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7170       AddToWorklist(Trunc00.getNode());
7171       AddToWorklist(Trunc01.getNode());
7172       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7173     }
7174   }
7175
7176   return SDValue();
7177 }
7178
7179 SDValue DAGCombiner::visitRotate(SDNode *N) {
7180   SDLoc dl(N);
7181   SDValue N0 = N->getOperand(0);
7182   SDValue N1 = N->getOperand(1);
7183   EVT VT = N->getValueType(0);
7184   unsigned Bitsize = VT.getScalarSizeInBits();
7185
7186   // fold (rot x, 0) -> x
7187   if (isNullOrNullSplat(N1))
7188     return N0;
7189
7190   // fold (rot x, c) -> x iff (c % BitSize) == 0
7191   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7192     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7193     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7194       return N0;
7195   }
7196
7197   // fold (rot x, c) -> (rot x, c % BitSize)
7198   // TODO - support non-uniform vector amounts.
7199   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7200     if (Cst->getAPIntValue().uge(Bitsize)) {
7201       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7202       return DAG.getNode(N->getOpcode(), dl, VT, N0,
7203                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
7204     }
7205   }
7206
7207   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7208   if (N1.getOpcode() == ISD::TRUNCATE &&
7209       N1.getOperand(0).getOpcode() == ISD::AND) {
7210     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7211       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7212   }
7213
7214   unsigned NextOp = N0.getOpcode();
7215   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7216   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7217     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7218     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7219     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7220       EVT ShiftVT = C1->getValueType(0);
7221       bool SameSide = (N->getOpcode() == NextOp);
7222       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7223       if (SDValue CombinedShift =
7224               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
7225         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7226         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7227             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
7228             BitsizeC.getNode());
7229         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7230                            CombinedShiftNorm);
7231       }
7232     }
7233   }
7234   return SDValue();
7235 }
7236
7237 SDValue DAGCombiner::visitSHL(SDNode *N) {
7238   SDValue N0 = N->getOperand(0);
7239   SDValue N1 = N->getOperand(1);
7240   if (SDValue V = DAG.simplifyShift(N0, N1))
7241     return V;
7242
7243   EVT VT = N0.getValueType();
7244   EVT ShiftVT = N1.getValueType();
7245   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7246
7247   // fold vector ops
7248   if (VT.isVector()) {
7249     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7250       return FoldedVOp;
7251
7252     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7253     // If setcc produces all-one true value then:
7254     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7255     if (N1CV && N1CV->isConstant()) {
7256       if (N0.getOpcode() == ISD::AND) {
7257         SDValue N00 = N0->getOperand(0);
7258         SDValue N01 = N0->getOperand(1);
7259         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7260
7261         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7262             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7263                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7264           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
7265                                                      N01CV, N1CV))
7266             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7267         }
7268       }
7269     }
7270   }
7271
7272   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7273
7274   // fold (shl c1, c2) -> c1<<c2
7275   // TODO - support non-uniform vector shift amounts.
7276   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7277   if (N0C && N1C && !N1C->isOpaque())
7278     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
7279
7280   if (SDValue NewSel = foldBinOpIntoSelect(N))
7281     return NewSel;
7282
7283   // if (shl x, c) is known to be zero, return 0
7284   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7285                             APInt::getAllOnesValue(OpSizeInBits)))
7286     return DAG.getConstant(0, SDLoc(N), VT);
7287
7288   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7289   if (N1.getOpcode() == ISD::TRUNCATE &&
7290       N1.getOperand(0).getOpcode() == ISD::AND) {
7291     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7292       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7293   }
7294
7295   // TODO - support non-uniform vector shift amounts.
7296   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7297     return SDValue(N, 0);
7298
7299   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7300   if (N0.getOpcode() == ISD::SHL) {
7301     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7302                                           ConstantSDNode *RHS) {
7303       APInt c1 = LHS->getAPIntValue();
7304       APInt c2 = RHS->getAPIntValue();
7305       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7306       return (c1 + c2).uge(OpSizeInBits);
7307     };
7308     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7309       return DAG.getConstant(0, SDLoc(N), VT);
7310
7311     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7312                                        ConstantSDNode *RHS) {
7313       APInt c1 = LHS->getAPIntValue();
7314       APInt c2 = RHS->getAPIntValue();
7315       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7316       return (c1 + c2).ult(OpSizeInBits);
7317     };
7318     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7319       SDLoc DL(N);
7320       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7321       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7322     }
7323   }
7324
7325   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7326   // For this to be valid, the second form must not preserve any of the bits
7327   // that are shifted out by the inner shift in the first form.  This means
7328   // the outer shift size must be >= the number of bits added by the ext.
7329   // As a corollary, we don't care what kind of ext it is.
7330   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7331        N0.getOpcode() == ISD::ANY_EXTEND ||
7332        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7333       N0.getOperand(0).getOpcode() == ISD::SHL) {
7334     SDValue N0Op0 = N0.getOperand(0);
7335     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7336     EVT InnerVT = N0Op0.getValueType();
7337     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7338
7339     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7340                                                          ConstantSDNode *RHS) {
7341       APInt c1 = LHS->getAPIntValue();
7342       APInt c2 = RHS->getAPIntValue();
7343       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7344       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7345              (c1 + c2).uge(OpSizeInBits);
7346     };
7347     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7348                                   /*AllowUndefs*/ false,
7349                                   /*AllowTypeMismatch*/ true))
7350       return DAG.getConstant(0, SDLoc(N), VT);
7351
7352     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7353                                                       ConstantSDNode *RHS) {
7354       APInt c1 = LHS->getAPIntValue();
7355       APInt c2 = RHS->getAPIntValue();
7356       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7357       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7358              (c1 + c2).ult(OpSizeInBits);
7359     };
7360     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7361                                   /*AllowUndefs*/ false,
7362                                   /*AllowTypeMismatch*/ true)) {
7363       SDLoc DL(N);
7364       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7365       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7366       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7367       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7368     }
7369   }
7370
7371   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7372   // Only fold this if the inner zext has no other uses to avoid increasing
7373   // the total number of instructions.
7374   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7375       N0.getOperand(0).getOpcode() == ISD::SRL) {
7376     SDValue N0Op0 = N0.getOperand(0);
7377     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7378
7379     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7380       APInt c1 = LHS->getAPIntValue();
7381       APInt c2 = RHS->getAPIntValue();
7382       zeroExtendToMatch(c1, c2);
7383       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7384     };
7385     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7386                                   /*AllowUndefs*/ false,
7387                                   /*AllowTypeMismatch*/ true)) {
7388       SDLoc DL(N);
7389       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7390       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7391       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7392       AddToWorklist(NewSHL.getNode());
7393       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7394     }
7395   }
7396
7397   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7398   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7399   // TODO - support non-uniform vector shift amounts.
7400   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7401       N0->getFlags().hasExact()) {
7402     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7403       uint64_t C1 = N0C1->getZExtValue();
7404       uint64_t C2 = N1C->getZExtValue();
7405       SDLoc DL(N);
7406       if (C1 <= C2)
7407         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7408                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7409       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7410                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7411     }
7412   }
7413
7414   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7415   //                               (and (srl x, (sub c1, c2), MASK)
7416   // Only fold this if the inner shift has no other uses -- if it does, folding
7417   // this will increase the total number of instructions.
7418   // TODO - drop hasOneUse requirement if c1 == c2?
7419   // TODO - support non-uniform vector shift amounts.
7420   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7421       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7422     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7423       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7424         uint64_t c1 = N0C1->getZExtValue();
7425         uint64_t c2 = N1C->getZExtValue();
7426         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7427         SDValue Shift;
7428         if (c2 > c1) {
7429           Mask <<= c2 - c1;
7430           SDLoc DL(N);
7431           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7432                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7433         } else {
7434           Mask.lshrInPlace(c1 - c2);
7435           SDLoc DL(N);
7436           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7437                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7438         }
7439         SDLoc DL(N0);
7440         return DAG.getNode(ISD::AND, DL, VT, Shift,
7441                            DAG.getConstant(Mask, DL, VT));
7442       }
7443     }
7444   }
7445
7446   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7447   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7448       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7449     SDLoc DL(N);
7450     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7451     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7452     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7453   }
7454
7455   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7456   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7457   // Variant of version done on multiply, except mul by a power of 2 is turned
7458   // into a shift.
7459   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7460       N0.getNode()->hasOneUse() &&
7461       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7462       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7463       TLI.isDesirableToCommuteWithShift(N, Level)) {
7464     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7465     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7466     AddToWorklist(Shl0.getNode());
7467     AddToWorklist(Shl1.getNode());
7468     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7469   }
7470
7471   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7472   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7473       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7474       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7475     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7476     if (isConstantOrConstantVector(Shl))
7477       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7478   }
7479
7480   if (N1C && !N1C->isOpaque())
7481     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
7482       return NewSHL;
7483
7484   return SDValue();
7485 }
7486
7487 SDValue DAGCombiner::visitSRA(SDNode *N) {
7488   SDValue N0 = N->getOperand(0);
7489   SDValue N1 = N->getOperand(1);
7490   if (SDValue V = DAG.simplifyShift(N0, N1))
7491     return V;
7492
7493   EVT VT = N0.getValueType();
7494   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7495
7496   // Arithmetic shifting an all-sign-bit value is a no-op.
7497   // fold (sra 0, x) -> 0
7498   // fold (sra -1, x) -> -1
7499   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7500     return N0;
7501
7502   // fold vector ops
7503   if (VT.isVector())
7504     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7505       return FoldedVOp;
7506
7507   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7508
7509   // fold (sra c1, c2) -> (sra c1, c2)
7510   // TODO - support non-uniform vector shift amounts.
7511   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7512   if (N0C && N1C && !N1C->isOpaque())
7513     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7514
7515   if (SDValue NewSel = foldBinOpIntoSelect(N))
7516     return NewSel;
7517
7518   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7519   // sext_inreg.
7520   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7521     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7522     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7523     if (VT.isVector())
7524       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7525                                ExtVT, VT.getVectorNumElements());
7526     if ((!LegalOperations ||
7527          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7528       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7529                          N0.getOperand(0), DAG.getValueType(ExtVT));
7530   }
7531
7532   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7533   // clamp (add c1, c2) to max shift.
7534   if (N0.getOpcode() == ISD::SRA) {
7535     SDLoc DL(N);
7536     EVT ShiftVT = N1.getValueType();
7537     EVT ShiftSVT = ShiftVT.getScalarType();
7538     SmallVector<SDValue, 16> ShiftValues;
7539
7540     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7541       APInt c1 = LHS->getAPIntValue();
7542       APInt c2 = RHS->getAPIntValue();
7543       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7544       APInt Sum = c1 + c2;
7545       unsigned ShiftSum =
7546           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7547       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7548       return true;
7549     };
7550     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7551       SDValue ShiftValue;
7552       if (VT.isVector())
7553         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7554       else
7555         ShiftValue = ShiftValues[0];
7556       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7557     }
7558   }
7559
7560   // fold (sra (shl X, m), (sub result_size, n))
7561   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7562   // result_size - n != m.
7563   // If truncate is free for the target sext(shl) is likely to result in better
7564   // code.
7565   if (N0.getOpcode() == ISD::SHL && N1C) {
7566     // Get the two constanst of the shifts, CN0 = m, CN = n.
7567     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7568     if (N01C) {
7569       LLVMContext &Ctx = *DAG.getContext();
7570       // Determine what the truncate's result bitsize and type would be.
7571       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7572
7573       if (VT.isVector())
7574         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7575
7576       // Determine the residual right-shift amount.
7577       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7578
7579       // If the shift is not a no-op (in which case this should be just a sign
7580       // extend already), the truncated to type is legal, sign_extend is legal
7581       // on that type, and the truncate to that type is both legal and free,
7582       // perform the transform.
7583       if ((ShiftAmt > 0) &&
7584           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7585           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7586           TLI.isTruncateFree(VT, TruncVT)) {
7587         SDLoc DL(N);
7588         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7589             getShiftAmountTy(N0.getOperand(0).getValueType()));
7590         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7591                                     N0.getOperand(0), Amt);
7592         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7593                                     Shift);
7594         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7595                            N->getValueType(0), Trunc);
7596       }
7597     }
7598   }
7599
7600   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7601   if (N1.getOpcode() == ISD::TRUNCATE &&
7602       N1.getOperand(0).getOpcode() == ISD::AND) {
7603     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7604       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7605   }
7606
7607   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7608   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7609   //      if c1 is equal to the number of bits the trunc removes
7610   // TODO - support non-uniform vector shift amounts.
7611   if (N0.getOpcode() == ISD::TRUNCATE &&
7612       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7613        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7614       N0.getOperand(0).hasOneUse() &&
7615       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7616     SDValue N0Op0 = N0.getOperand(0);
7617     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7618       EVT LargeVT = N0Op0.getValueType();
7619       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7620       if (LargeShift->getAPIntValue() == TruncBits) {
7621         SDLoc DL(N);
7622         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7623                                       getShiftAmountTy(LargeVT));
7624         SDValue SRA =
7625             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7626         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7627       }
7628     }
7629   }
7630
7631   // Simplify, based on bits shifted out of the LHS.
7632   // TODO - support non-uniform vector shift amounts.
7633   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7634     return SDValue(N, 0);
7635
7636   // If the sign bit is known to be zero, switch this to a SRL.
7637   if (DAG.SignBitIsZero(N0))
7638     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7639
7640   if (N1C && !N1C->isOpaque())
7641     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
7642       return NewSRA;
7643
7644   return SDValue();
7645 }
7646
7647 SDValue DAGCombiner::visitSRL(SDNode *N) {
7648   SDValue N0 = N->getOperand(0);
7649   SDValue N1 = N->getOperand(1);
7650   if (SDValue V = DAG.simplifyShift(N0, N1))
7651     return V;
7652
7653   EVT VT = N0.getValueType();
7654   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7655
7656   // fold vector ops
7657   if (VT.isVector())
7658     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7659       return FoldedVOp;
7660
7661   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7662
7663   // fold (srl c1, c2) -> c1 >>u c2
7664   // TODO - support non-uniform vector shift amounts.
7665   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7666   if (N0C && N1C && !N1C->isOpaque())
7667     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7668
7669   if (SDValue NewSel = foldBinOpIntoSelect(N))
7670     return NewSel;
7671
7672   // if (srl x, c) is known to be zero, return 0
7673   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7674                                    APInt::getAllOnesValue(OpSizeInBits)))
7675     return DAG.getConstant(0, SDLoc(N), VT);
7676
7677   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7678   if (N0.getOpcode() == ISD::SRL) {
7679     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7680                                           ConstantSDNode *RHS) {
7681       APInt c1 = LHS->getAPIntValue();
7682       APInt c2 = RHS->getAPIntValue();
7683       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7684       return (c1 + c2).uge(OpSizeInBits);
7685     };
7686     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7687       return DAG.getConstant(0, SDLoc(N), VT);
7688
7689     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7690                                        ConstantSDNode *RHS) {
7691       APInt c1 = LHS->getAPIntValue();
7692       APInt c2 = RHS->getAPIntValue();
7693       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7694       return (c1 + c2).ult(OpSizeInBits);
7695     };
7696     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7697       SDLoc DL(N);
7698       EVT ShiftVT = N1.getValueType();
7699       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7700       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7701     }
7702   }
7703
7704   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7705   // TODO - support non-uniform vector shift amounts.
7706   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7707       N0.getOperand(0).getOpcode() == ISD::SRL) {
7708     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7709       uint64_t c1 = N001C->getZExtValue();
7710       uint64_t c2 = N1C->getZExtValue();
7711       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7712       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7713       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7714       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7715       if (c1 + OpSizeInBits == InnerShiftSize) {
7716         SDLoc DL(N0);
7717         if (c1 + c2 >= InnerShiftSize)
7718           return DAG.getConstant(0, DL, VT);
7719         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7720                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7721                                        N0.getOperand(0).getOperand(0),
7722                                        DAG.getConstant(c1 + c2, DL,
7723                                                        ShiftCountVT)));
7724       }
7725     }
7726   }
7727
7728   // fold (srl (shl x, c), c) -> (and x, cst2)
7729   // TODO - (srl (shl x, c1), c2).
7730   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7731       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7732     SDLoc DL(N);
7733     SDValue Mask =
7734         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7735     AddToWorklist(Mask.getNode());
7736     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7737   }
7738
7739   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7740   // TODO - support non-uniform vector shift amounts.
7741   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7742     // Shifting in all undef bits?
7743     EVT SmallVT = N0.getOperand(0).getValueType();
7744     unsigned BitSize = SmallVT.getScalarSizeInBits();
7745     if (N1C->getAPIntValue().uge(BitSize))
7746       return DAG.getUNDEF(VT);
7747
7748     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7749       uint64_t ShiftAmt = N1C->getZExtValue();
7750       SDLoc DL0(N0);
7751       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7752                                        N0.getOperand(0),
7753                           DAG.getConstant(ShiftAmt, DL0,
7754                                           getShiftAmountTy(SmallVT)));
7755       AddToWorklist(SmallShift.getNode());
7756       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7757       SDLoc DL(N);
7758       return DAG.getNode(ISD::AND, DL, VT,
7759                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7760                          DAG.getConstant(Mask, DL, VT));
7761     }
7762   }
7763
7764   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7765   // bit, which is unmodified by sra.
7766   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
7767     if (N0.getOpcode() == ISD::SRA)
7768       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7769   }
7770
7771   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7772   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7773       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7774     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7775
7776     // If any of the input bits are KnownOne, then the input couldn't be all
7777     // zeros, thus the result of the srl will always be zero.
7778     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7779
7780     // If all of the bits input the to ctlz node are known to be zero, then
7781     // the result of the ctlz is "32" and the result of the shift is one.
7782     APInt UnknownBits = ~Known.Zero;
7783     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7784
7785     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7786     if (UnknownBits.isPowerOf2()) {
7787       // Okay, we know that only that the single bit specified by UnknownBits
7788       // could be set on input to the CTLZ node. If this bit is set, the SRL
7789       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7790       // to an SRL/XOR pair, which is likely to simplify more.
7791       unsigned ShAmt = UnknownBits.countTrailingZeros();
7792       SDValue Op = N0.getOperand(0);
7793
7794       if (ShAmt) {
7795         SDLoc DL(N0);
7796         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7797                   DAG.getConstant(ShAmt, DL,
7798                                   getShiftAmountTy(Op.getValueType())));
7799         AddToWorklist(Op.getNode());
7800       }
7801
7802       SDLoc DL(N);
7803       return DAG.getNode(ISD::XOR, DL, VT,
7804                          Op, DAG.getConstant(1, DL, VT));
7805     }
7806   }
7807
7808   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7809   if (N1.getOpcode() == ISD::TRUNCATE &&
7810       N1.getOperand(0).getOpcode() == ISD::AND) {
7811     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7812       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7813   }
7814
7815   // fold operands of srl based on knowledge that the low bits are not
7816   // demanded.
7817   // TODO - support non-uniform vector shift amounts.
7818   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7819     return SDValue(N, 0);
7820
7821   if (N1C && !N1C->isOpaque())
7822     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7823       return NewSRL;
7824
7825   // Attempt to convert a srl of a load into a narrower zero-extending load.
7826   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7827     return NarrowLoad;
7828
7829   // Here is a common situation. We want to optimize:
7830   //
7831   //   %a = ...
7832   //   %b = and i32 %a, 2
7833   //   %c = srl i32 %b, 1
7834   //   brcond i32 %c ...
7835   //
7836   // into
7837   //
7838   //   %a = ...
7839   //   %b = and %a, 2
7840   //   %c = setcc eq %b, 0
7841   //   brcond %c ...
7842   //
7843   // However when after the source operand of SRL is optimized into AND, the SRL
7844   // itself may not be optimized further. Look for it and add the BRCOND into
7845   // the worklist.
7846   if (N->hasOneUse()) {
7847     SDNode *Use = *N->use_begin();
7848     if (Use->getOpcode() == ISD::BRCOND)
7849       AddToWorklist(Use);
7850     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7851       // Also look pass the truncate.
7852       Use = *Use->use_begin();
7853       if (Use->getOpcode() == ISD::BRCOND)
7854         AddToWorklist(Use);
7855     }
7856   }
7857
7858   return SDValue();
7859 }
7860
7861 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7862   EVT VT = N->getValueType(0);
7863   SDValue N0 = N->getOperand(0);
7864   SDValue N1 = N->getOperand(1);
7865   SDValue N2 = N->getOperand(2);
7866   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7867   unsigned BitWidth = VT.getScalarSizeInBits();
7868
7869   // fold (fshl N0, N1, 0) -> N0
7870   // fold (fshr N0, N1, 0) -> N1
7871   if (isPowerOf2_32(BitWidth))
7872     if (DAG.MaskedValueIsZero(
7873             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7874       return IsFSHL ? N0 : N1;
7875
7876   auto IsUndefOrZero = [](SDValue V) {
7877     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7878   };
7879
7880   // TODO - support non-uniform vector shift amounts.
7881   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7882     EVT ShAmtTy = N2.getValueType();
7883
7884     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7885     if (Cst->getAPIntValue().uge(BitWidth)) {
7886       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7887       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7888                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7889     }
7890
7891     unsigned ShAmt = Cst->getZExtValue();
7892     if (ShAmt == 0)
7893       return IsFSHL ? N0 : N1;
7894
7895     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7896     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7897     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7898     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7899     if (IsUndefOrZero(N0))
7900       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7901                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7902                                          SDLoc(N), ShAmtTy));
7903     if (IsUndefOrZero(N1))
7904       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7905                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7906                                          SDLoc(N), ShAmtTy));
7907   }
7908
7909   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7910   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7911   // iff We know the shift amount is in range.
7912   // TODO: when is it worth doing SUB(BW, N2) as well?
7913   if (isPowerOf2_32(BitWidth)) {
7914     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7915     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7916       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7917     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7918       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7919   }
7920
7921   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7922   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7923   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7924   // is legal as well we might be better off avoiding non-constant (BW - N2).
7925   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7926   if (N0 == N1 && hasOperation(RotOpc, VT))
7927     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7928
7929   // Simplify, based on bits shifted out of N0/N1.
7930   if (SimplifyDemandedBits(SDValue(N, 0)))
7931     return SDValue(N, 0);
7932
7933   return SDValue();
7934 }
7935
7936 SDValue DAGCombiner::visitABS(SDNode *N) {
7937   SDValue N0 = N->getOperand(0);
7938   EVT VT = N->getValueType(0);
7939
7940   // fold (abs c1) -> c2
7941   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7942     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7943   // fold (abs (abs x)) -> (abs x)
7944   if (N0.getOpcode() == ISD::ABS)
7945     return N0;
7946   // fold (abs x) -> x iff not-negative
7947   if (DAG.SignBitIsZero(N0))
7948     return N0;
7949   return SDValue();
7950 }
7951
7952 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7953   SDValue N0 = N->getOperand(0);
7954   EVT VT = N->getValueType(0);
7955
7956   // fold (bswap c1) -> c2
7957   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7958     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7959   // fold (bswap (bswap x)) -> x
7960   if (N0.getOpcode() == ISD::BSWAP)
7961     return N0->getOperand(0);
7962   return SDValue();
7963 }
7964
7965 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7966   SDValue N0 = N->getOperand(0);
7967   EVT VT = N->getValueType(0);
7968
7969   // fold (bitreverse c1) -> c2
7970   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7971     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7972   // fold (bitreverse (bitreverse x)) -> x
7973   if (N0.getOpcode() == ISD::BITREVERSE)
7974     return N0.getOperand(0);
7975   return SDValue();
7976 }
7977
7978 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7979   SDValue N0 = N->getOperand(0);
7980   EVT VT = N->getValueType(0);
7981
7982   // fold (ctlz c1) -> c2
7983   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7984     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7985
7986   // If the value is known never to be zero, switch to the undef version.
7987   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7988     if (DAG.isKnownNeverZero(N0))
7989       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7990   }
7991
7992   return SDValue();
7993 }
7994
7995 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7996   SDValue N0 = N->getOperand(0);
7997   EVT VT = N->getValueType(0);
7998
7999   // fold (ctlz_zero_undef c1) -> c2
8000   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8001     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8002   return SDValue();
8003 }
8004
8005 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8006   SDValue N0 = N->getOperand(0);
8007   EVT VT = N->getValueType(0);
8008
8009   // fold (cttz c1) -> c2
8010   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8011     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8012
8013   // If the value is known never to be zero, switch to the undef version.
8014   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8015     if (DAG.isKnownNeverZero(N0))
8016       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8017   }
8018
8019   return SDValue();
8020 }
8021
8022 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8023   SDValue N0 = N->getOperand(0);
8024   EVT VT = N->getValueType(0);
8025
8026   // fold (cttz_zero_undef c1) -> c2
8027   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8028     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8029   return SDValue();
8030 }
8031
8032 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8033   SDValue N0 = N->getOperand(0);
8034   EVT VT = N->getValueType(0);
8035
8036   // fold (ctpop c1) -> c2
8037   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8038     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8039   return SDValue();
8040 }
8041
8042 // FIXME: This should be checking for no signed zeros on individual operands, as
8043 // well as no nans.
8044 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8045                                          SDValue RHS,
8046                                          const TargetLowering &TLI) {
8047   const TargetOptions &Options = DAG.getTarget().Options;
8048   EVT VT = LHS.getValueType();
8049
8050   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8051          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8052          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8053 }
8054
8055 /// Generate Min/Max node
8056 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8057                                    SDValue RHS, SDValue True, SDValue False,
8058                                    ISD::CondCode CC, const TargetLowering &TLI,
8059                                    SelectionDAG &DAG) {
8060   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8061     return SDValue();
8062
8063   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8064   switch (CC) {
8065   case ISD::SETOLT:
8066   case ISD::SETOLE:
8067   case ISD::SETLT:
8068   case ISD::SETLE:
8069   case ISD::SETULT:
8070   case ISD::SETULE: {
8071     // Since it's known never nan to get here already, either fminnum or
8072     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8073     // expanded in terms of it.
8074     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8075     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8076       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8077
8078     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8079     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8080       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8081     return SDValue();
8082   }
8083   case ISD::SETOGT:
8084   case ISD::SETOGE:
8085   case ISD::SETGT:
8086   case ISD::SETGE:
8087   case ISD::SETUGT:
8088   case ISD::SETUGE: {
8089     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8090     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8091       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8092
8093     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8094     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8095       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8096     return SDValue();
8097   }
8098   default:
8099     return SDValue();
8100   }
8101 }
8102
8103 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8104   SDValue Cond = N->getOperand(0);
8105   SDValue N1 = N->getOperand(1);
8106   SDValue N2 = N->getOperand(2);
8107   EVT VT = N->getValueType(0);
8108   EVT CondVT = Cond.getValueType();
8109   SDLoc DL(N);
8110
8111   if (!VT.isInteger())
8112     return SDValue();
8113
8114   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8115   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8116   if (!C1 || !C2)
8117     return SDValue();
8118
8119   // Only do this before legalization to avoid conflicting with target-specific
8120   // transforms in the other direction (create a select from a zext/sext). There
8121   // is also a target-independent combine here in DAGCombiner in the other
8122   // direction for (select Cond, -1, 0) when the condition is not i1.
8123   if (CondVT == MVT::i1 && !LegalOperations) {
8124     if (C1->isNullValue() && C2->isOne()) {
8125       // select Cond, 0, 1 --> zext (!Cond)
8126       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8127       if (VT != MVT::i1)
8128         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8129       return NotCond;
8130     }
8131     if (C1->isNullValue() && C2->isAllOnesValue()) {
8132       // select Cond, 0, -1 --> sext (!Cond)
8133       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8134       if (VT != MVT::i1)
8135         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8136       return NotCond;
8137     }
8138     if (C1->isOne() && C2->isNullValue()) {
8139       // select Cond, 1, 0 --> zext (Cond)
8140       if (VT != MVT::i1)
8141         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8142       return Cond;
8143     }
8144     if (C1->isAllOnesValue() && C2->isNullValue()) {
8145       // select Cond, -1, 0 --> sext (Cond)
8146       if (VT != MVT::i1)
8147         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8148       return Cond;
8149     }
8150
8151     // For any constants that differ by 1, we can transform the select into an
8152     // extend and add. Use a target hook because some targets may prefer to
8153     // transform in the other direction.
8154     if (TLI.convertSelectOfConstantsToMath(VT)) {
8155       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
8156         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8157         if (VT != MVT::i1)
8158           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8159         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8160       }
8161       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
8162         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8163         if (VT != MVT::i1)
8164           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8165         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8166       }
8167     }
8168
8169     return SDValue();
8170   }
8171
8172   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8173   // We can't do this reliably if integer based booleans have different contents
8174   // to floating point based booleans. This is because we can't tell whether we
8175   // have an integer-based boolean or a floating-point-based boolean unless we
8176   // can find the SETCC that produced it and inspect its operands. This is
8177   // fairly easy if C is the SETCC node, but it can potentially be
8178   // undiscoverable (or not reasonably discoverable). For example, it could be
8179   // in another basic block or it could require searching a complicated
8180   // expression.
8181   if (CondVT.isInteger() &&
8182       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8183           TargetLowering::ZeroOrOneBooleanContent &&
8184       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8185           TargetLowering::ZeroOrOneBooleanContent &&
8186       C1->isNullValue() && C2->isOne()) {
8187     SDValue NotCond =
8188         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8189     if (VT.bitsEq(CondVT))
8190       return NotCond;
8191     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8192   }
8193
8194   return SDValue();
8195 }
8196
8197 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8198   SDValue N0 = N->getOperand(0);
8199   SDValue N1 = N->getOperand(1);
8200   SDValue N2 = N->getOperand(2);
8201   EVT VT = N->getValueType(0);
8202   EVT VT0 = N0.getValueType();
8203   SDLoc DL(N);
8204   SDNodeFlags Flags = N->getFlags();
8205
8206   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8207     return V;
8208
8209   // fold (select X, X, Y) -> (or X, Y)
8210   // fold (select X, 1, Y) -> (or C, Y)
8211   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8212     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8213
8214   if (SDValue V = foldSelectOfConstants(N))
8215     return V;
8216
8217   // fold (select C, 0, X) -> (and (not C), X)
8218   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8219     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8220     AddToWorklist(NOTNode.getNode());
8221     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8222   }
8223   // fold (select C, X, 1) -> (or (not C), X)
8224   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8225     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8226     AddToWorklist(NOTNode.getNode());
8227     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8228   }
8229   // fold (select X, Y, X) -> (and X, Y)
8230   // fold (select X, Y, 0) -> (and X, Y)
8231   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8232     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8233
8234   // If we can fold this based on the true/false value, do so.
8235   if (SimplifySelectOps(N, N1, N2))
8236     return SDValue(N, 0); // Don't revisit N.
8237
8238   if (VT0 == MVT::i1) {
8239     // The code in this block deals with the following 2 equivalences:
8240     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8241     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8242     // The target can specify its preferred form with the
8243     // shouldNormalizeToSelectSequence() callback. However we always transform
8244     // to the right anyway if we find the inner select exists in the DAG anyway
8245     // and we always transform to the left side if we know that we can further
8246     // optimize the combination of the conditions.
8247     bool normalizeToSequence =
8248         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8249     // select (and Cond0, Cond1), X, Y
8250     //   -> select Cond0, (select Cond1, X, Y), Y
8251     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8252       SDValue Cond0 = N0->getOperand(0);
8253       SDValue Cond1 = N0->getOperand(1);
8254       SDValue InnerSelect =
8255           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8256       if (normalizeToSequence || !InnerSelect.use_empty())
8257         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8258                            InnerSelect, N2, Flags);
8259       // Cleanup on failure.
8260       if (InnerSelect.use_empty())
8261         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8262     }
8263     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8264     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8265       SDValue Cond0 = N0->getOperand(0);
8266       SDValue Cond1 = N0->getOperand(1);
8267       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8268                                         Cond1, N1, N2, Flags);
8269       if (normalizeToSequence || !InnerSelect.use_empty())
8270         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8271                            InnerSelect, Flags);
8272       // Cleanup on failure.
8273       if (InnerSelect.use_empty())
8274         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8275     }
8276
8277     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8278     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8279       SDValue N1_0 = N1->getOperand(0);
8280       SDValue N1_1 = N1->getOperand(1);
8281       SDValue N1_2 = N1->getOperand(2);
8282       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8283         // Create the actual and node if we can generate good code for it.
8284         if (!normalizeToSequence) {
8285           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8286           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8287                              N2, Flags);
8288         }
8289         // Otherwise see if we can optimize the "and" to a better pattern.
8290         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8291           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8292                              N2, Flags);
8293         }
8294       }
8295     }
8296     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8297     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8298       SDValue N2_0 = N2->getOperand(0);
8299       SDValue N2_1 = N2->getOperand(1);
8300       SDValue N2_2 = N2->getOperand(2);
8301       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8302         // Create the actual or node if we can generate good code for it.
8303         if (!normalizeToSequence) {
8304           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8305           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8306                              N2_2, Flags);
8307         }
8308         // Otherwise see if we can optimize to a better pattern.
8309         if (SDValue Combined = visitORLike(N0, N2_0, N))
8310           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8311                              N2_2, Flags);
8312       }
8313     }
8314   }
8315
8316   // select (not Cond), N1, N2 -> select Cond, N2, N1
8317   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8318     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8319     SelectOp->setFlags(Flags);
8320     return SelectOp;
8321   }
8322
8323   // Fold selects based on a setcc into other things, such as min/max/abs.
8324   if (N0.getOpcode() == ISD::SETCC) {
8325     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8326     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8327
8328     // select (fcmp lt x, y), x, y -> fminnum x, y
8329     // select (fcmp gt x, y), x, y -> fmaxnum x, y
8330     //
8331     // This is OK if we don't care what happens if either operand is a NaN.
8332     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8333       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8334                                                 CC, TLI, DAG))
8335         return FMinMax;
8336
8337     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8338     // This is conservatively limited to pre-legal-operations to give targets
8339     // a chance to reverse the transform if they want to do that. Also, it is
8340     // unlikely that the pattern would be formed late, so it's probably not
8341     // worth going through the other checks.
8342     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8343         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8344         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8345       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8346       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8347       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8348         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8349         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8350         //
8351         // The IR equivalent of this transform would have this form:
8352         //   %a = add %x, C
8353         //   %c = icmp ugt %x, ~C
8354         //   %r = select %c, -1, %a
8355         //   =>
8356         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8357         //   %u0 = extractvalue %u, 0
8358         //   %u1 = extractvalue %u, 1
8359         //   %r = select %u1, -1, %u0
8360         SDVTList VTs = DAG.getVTList(VT, VT0);
8361         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8362         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8363       }
8364     }
8365
8366     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8367         (!LegalOperations &&
8368          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
8369       // Any flags available in a select/setcc fold will be on the setcc as they
8370       // migrated from fcmp
8371       Flags = N0.getNode()->getFlags();
8372       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8373                                        N2, N0.getOperand(2));
8374       SelectNode->setFlags(Flags);
8375       return SelectNode;
8376     }
8377
8378     return SimplifySelect(DL, N0, N1, N2);
8379   }
8380
8381   return SDValue();
8382 }
8383
8384 static
8385 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
8386   SDLoc DL(N);
8387   EVT LoVT, HiVT;
8388   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
8389
8390   // Split the inputs.
8391   SDValue Lo, Hi, LL, LH, RL, RH;
8392   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
8393   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
8394
8395   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
8396   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
8397
8398   return std::make_pair(Lo, Hi);
8399 }
8400
8401 // This function assumes all the vselect's arguments are CONCAT_VECTOR
8402 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8403 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8404   SDLoc DL(N);
8405   SDValue Cond = N->getOperand(0);
8406   SDValue LHS = N->getOperand(1);
8407   SDValue RHS = N->getOperand(2);
8408   EVT VT = N->getValueType(0);
8409   int NumElems = VT.getVectorNumElements();
8410   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
8411          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8412          Cond.getOpcode() == ISD::BUILD_VECTOR);
8413
8414   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8415   // binary ones here.
8416   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8417     return SDValue();
8418
8419   // We're sure we have an even number of elements due to the
8420   // concat_vectors we have as arguments to vselect.
8421   // Skip BV elements until we find one that's not an UNDEF
8422   // After we find an UNDEF element, keep looping until we get to half the
8423   // length of the BV and see if all the non-undef nodes are the same.
8424   ConstantSDNode *BottomHalf = nullptr;
8425   for (int i = 0; i < NumElems / 2; ++i) {
8426     if (Cond->getOperand(i)->isUndef())
8427       continue;
8428
8429     if (BottomHalf == nullptr)
8430       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8431     else if (Cond->getOperand(i).getNode() != BottomHalf)
8432       return SDValue();
8433   }
8434
8435   // Do the same for the second half of the BuildVector
8436   ConstantSDNode *TopHalf = nullptr;
8437   for (int i = NumElems / 2; i < NumElems; ++i) {
8438     if (Cond->getOperand(i)->isUndef())
8439       continue;
8440
8441     if (TopHalf == nullptr)
8442       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8443     else if (Cond->getOperand(i).getNode() != TopHalf)
8444       return SDValue();
8445   }
8446
8447   assert(TopHalf && BottomHalf &&
8448          "One half of the selector was all UNDEFs and the other was all the "
8449          "same value. This should have been addressed before this function.");
8450   return DAG.getNode(
8451       ISD::CONCAT_VECTORS, DL, VT,
8452       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8453       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8454 }
8455
8456 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8457   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8458   SDValue Mask = MSC->getMask();
8459   SDValue Data = MSC->getValue();
8460   SDValue Chain = MSC->getChain();
8461   SDLoc DL(N);
8462
8463   // Zap scatters with a zero mask.
8464   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8465     return Chain;
8466
8467   if (Level >= AfterLegalizeTypes)
8468     return SDValue();
8469
8470   // If the MSCATTER data type requires splitting and the mask is provided by a
8471   // SETCC, then split both nodes and its operands before legalization. This
8472   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8473   // and enables future optimizations (e.g. min/max pattern matching on X86).
8474   if (Mask.getOpcode() != ISD::SETCC)
8475     return SDValue();
8476
8477   // Check if any splitting is required.
8478   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
8479       TargetLowering::TypeSplitVector)
8480     return SDValue();
8481   SDValue MaskLo, MaskHi;
8482   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8483
8484   EVT LoVT, HiVT;
8485   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
8486
8487   EVT MemoryVT = MSC->getMemoryVT();
8488   unsigned Alignment = MSC->getOriginalAlignment();
8489
8490   EVT LoMemVT, HiMemVT;
8491   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8492
8493   SDValue DataLo, DataHi;
8494   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8495
8496   SDValue Scale = MSC->getScale();
8497   SDValue BasePtr = MSC->getBasePtr();
8498   SDValue IndexLo, IndexHi;
8499   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
8500
8501   MachineMemOperand *MMO = DAG.getMachineFunction().
8502     getMachineMemOperand(MSC->getPointerInfo(),
8503                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8504                           Alignment, MSC->getAAInfo(), MSC->getRanges());
8505
8506   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
8507   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
8508                                     DataLo.getValueType(), DL, OpsLo, MMO);
8509
8510   // The order of the Scatter operation after split is well defined. The "Hi"
8511   // part comes after the "Lo". So these two operations should be chained one
8512   // after another.
8513   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
8514   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
8515                               DL, OpsHi, MMO);
8516 }
8517
8518 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8519   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8520   SDValue Mask = MST->getMask();
8521   SDValue Data = MST->getValue();
8522   SDValue Chain = MST->getChain();
8523   EVT VT = Data.getValueType();
8524   SDLoc DL(N);
8525
8526   // Zap masked stores with a zero mask.
8527   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8528     return Chain;
8529
8530   if (Level >= AfterLegalizeTypes)
8531     return SDValue();
8532
8533   // If the MSTORE data type requires splitting and the mask is provided by a
8534   // SETCC, then split both nodes and its operands before legalization. This
8535   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8536   // and enables future optimizations (e.g. min/max pattern matching on X86).
8537   if (Mask.getOpcode() == ISD::SETCC) {
8538     // Check if any splitting is required.
8539     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8540         TargetLowering::TypeSplitVector)
8541       return SDValue();
8542
8543     SDValue MaskLo, MaskHi, Lo, Hi;
8544     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8545
8546     SDValue Ptr   = MST->getBasePtr();
8547
8548     EVT MemoryVT = MST->getMemoryVT();
8549     unsigned Alignment = MST->getOriginalAlignment();
8550
8551     EVT LoMemVT, HiMemVT;
8552     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8553
8554     SDValue DataLo, DataHi;
8555     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8556
8557     MachineMemOperand *MMO = DAG.getMachineFunction().
8558       getMachineMemOperand(MST->getPointerInfo(),
8559                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8560                            Alignment, MST->getAAInfo(), MST->getRanges());
8561
8562     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
8563                             MST->isTruncatingStore(),
8564                             MST->isCompressingStore());
8565
8566     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8567                                      MST->isCompressingStore());
8568     unsigned HiOffset = LoMemVT.getStoreSize();
8569
8570     MMO = DAG.getMachineFunction().getMachineMemOperand(
8571         MST->getPointerInfo().getWithOffset(HiOffset),
8572         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
8573         MST->getAAInfo(), MST->getRanges());
8574
8575     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
8576                             MST->isTruncatingStore(),
8577                             MST->isCompressingStore());
8578
8579     AddToWorklist(Lo.getNode());
8580     AddToWorklist(Hi.getNode());
8581
8582     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8583   }
8584   return SDValue();
8585 }
8586
8587 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8588   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8589   SDValue Mask = MGT->getMask();
8590   SDLoc DL(N);
8591
8592   // Zap gathers with a zero mask.
8593   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8594     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8595
8596   if (Level >= AfterLegalizeTypes)
8597     return SDValue();
8598
8599   // If the MGATHER result requires splitting and the mask is provided by a
8600   // SETCC, then split both nodes and its operands before legalization. This
8601   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8602   // and enables future optimizations (e.g. min/max pattern matching on X86).
8603
8604   if (Mask.getOpcode() != ISD::SETCC)
8605     return SDValue();
8606
8607   EVT VT = N->getValueType(0);
8608
8609   // Check if any splitting is required.
8610   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8611       TargetLowering::TypeSplitVector)
8612     return SDValue();
8613
8614   SDValue MaskLo, MaskHi, Lo, Hi;
8615   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8616
8617   SDValue PassThru = MGT->getPassThru();
8618   SDValue PassThruLo, PassThruHi;
8619   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8620
8621   EVT LoVT, HiVT;
8622   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
8623
8624   SDValue Chain = MGT->getChain();
8625   EVT MemoryVT = MGT->getMemoryVT();
8626   unsigned Alignment = MGT->getOriginalAlignment();
8627
8628   EVT LoMemVT, HiMemVT;
8629   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8630
8631   SDValue Scale = MGT->getScale();
8632   SDValue BasePtr = MGT->getBasePtr();
8633   SDValue Index = MGT->getIndex();
8634   SDValue IndexLo, IndexHi;
8635   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
8636
8637   MachineMemOperand *MMO = DAG.getMachineFunction().
8638     getMachineMemOperand(MGT->getPointerInfo(),
8639                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8640                           Alignment, MGT->getAAInfo(), MGT->getRanges());
8641
8642   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
8643   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
8644                            MMO);
8645
8646   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
8647   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
8648                            MMO);
8649
8650   AddToWorklist(Lo.getNode());
8651   AddToWorklist(Hi.getNode());
8652
8653   // Build a factor node to remember that this load is independent of the
8654   // other one.
8655   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8656                       Hi.getValue(1));
8657
8658   // Legalized the chain result - switch anything that used the old chain to
8659   // use the new one.
8660   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
8661
8662   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8663
8664   SDValue RetOps[] = { GatherRes, Chain };
8665   return DAG.getMergeValues(RetOps, DL);
8666 }
8667
8668 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8669   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8670   SDValue Mask = MLD->getMask();
8671   SDLoc DL(N);
8672
8673   // Zap masked loads with a zero mask.
8674   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8675     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8676
8677   if (Level >= AfterLegalizeTypes)
8678     return SDValue();
8679
8680   // If the MLOAD result requires splitting and the mask is provided by a
8681   // SETCC, then split both nodes and its operands before legalization. This
8682   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8683   // and enables future optimizations (e.g. min/max pattern matching on X86).
8684   if (Mask.getOpcode() == ISD::SETCC) {
8685     EVT VT = N->getValueType(0);
8686
8687     // Check if any splitting is required.
8688     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8689         TargetLowering::TypeSplitVector)
8690       return SDValue();
8691
8692     SDValue MaskLo, MaskHi, Lo, Hi;
8693     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8694
8695     SDValue PassThru = MLD->getPassThru();
8696     SDValue PassThruLo, PassThruHi;
8697     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8698
8699     EVT LoVT, HiVT;
8700     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
8701
8702     SDValue Chain = MLD->getChain();
8703     SDValue Ptr   = MLD->getBasePtr();
8704     EVT MemoryVT = MLD->getMemoryVT();
8705     unsigned Alignment = MLD->getOriginalAlignment();
8706
8707     EVT LoMemVT, HiMemVT;
8708     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8709
8710     MachineMemOperand *MMO = DAG.getMachineFunction().
8711     getMachineMemOperand(MLD->getPointerInfo(),
8712                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8713                          Alignment, MLD->getAAInfo(), MLD->getRanges());
8714
8715     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8716                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8717
8718     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8719                                      MLD->isExpandingLoad());
8720     unsigned HiOffset = LoMemVT.getStoreSize();
8721
8722     MMO = DAG.getMachineFunction().getMachineMemOperand(
8723         MLD->getPointerInfo().getWithOffset(HiOffset),
8724         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
8725         MLD->getAAInfo(), MLD->getRanges());
8726
8727     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8728                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8729
8730     AddToWorklist(Lo.getNode());
8731     AddToWorklist(Hi.getNode());
8732
8733     // Build a factor node to remember that this load is independent of the
8734     // other one.
8735     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8736                         Hi.getValue(1));
8737
8738     // Legalized the chain result - switch anything that used the old chain to
8739     // use the new one.
8740     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8741
8742     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8743
8744     SDValue RetOps[] = { LoadRes, Chain };
8745     return DAG.getMergeValues(RetOps, DL);
8746   }
8747   return SDValue();
8748 }
8749
8750 /// A vector select of 2 constant vectors can be simplified to math/logic to
8751 /// avoid a variable select instruction and possibly avoid constant loads.
8752 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8753   SDValue Cond = N->getOperand(0);
8754   SDValue N1 = N->getOperand(1);
8755   SDValue N2 = N->getOperand(2);
8756   EVT VT = N->getValueType(0);
8757   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8758       !TLI.convertSelectOfConstantsToMath(VT) ||
8759       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8760       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8761     return SDValue();
8762
8763   // Check if we can use the condition value to increment/decrement a single
8764   // constant value. This simplifies a select to an add and removes a constant
8765   // load/materialization from the general case.
8766   bool AllAddOne = true;
8767   bool AllSubOne = true;
8768   unsigned Elts = VT.getVectorNumElements();
8769   for (unsigned i = 0; i != Elts; ++i) {
8770     SDValue N1Elt = N1.getOperand(i);
8771     SDValue N2Elt = N2.getOperand(i);
8772     if (N1Elt.isUndef() || N2Elt.isUndef())
8773       continue;
8774
8775     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8776     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8777     if (C1 != C2 + 1)
8778       AllAddOne = false;
8779     if (C1 != C2 - 1)
8780       AllSubOne = false;
8781   }
8782
8783   // Further simplifications for the extra-special cases where the constants are
8784   // all 0 or all -1 should be implemented as folds of these patterns.
8785   SDLoc DL(N);
8786   if (AllAddOne || AllSubOne) {
8787     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8788     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8789     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8790     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8791     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8792   }
8793
8794   // The general case for select-of-constants:
8795   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8796   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8797   // leave that to a machine-specific pass.
8798   return SDValue();
8799 }
8800
8801 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8802   SDValue N0 = N->getOperand(0);
8803   SDValue N1 = N->getOperand(1);
8804   SDValue N2 = N->getOperand(2);
8805   EVT VT = N->getValueType(0);
8806   SDLoc DL(N);
8807
8808   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8809     return V;
8810
8811   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8812   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8813     return DAG.getSelect(DL, VT, F, N2, N1);
8814
8815   // Canonicalize integer abs.
8816   // vselect (setg[te] X,  0),  X, -X ->
8817   // vselect (setgt    X, -1),  X, -X ->
8818   // vselect (setl[te] X,  0), -X,  X ->
8819   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8820   if (N0.getOpcode() == ISD::SETCC) {
8821     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8822     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8823     bool isAbs = false;
8824     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8825
8826     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8827          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8828         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8829       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8830     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8831              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8832       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8833
8834     if (isAbs) {
8835       EVT VT = LHS.getValueType();
8836       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8837         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8838
8839       SDValue Shift = DAG.getNode(
8840           ISD::SRA, DL, VT, LHS,
8841           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8842       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8843       AddToWorklist(Shift.getNode());
8844       AddToWorklist(Add.getNode());
8845       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8846     }
8847
8848     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8849     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8850     //
8851     // This is OK if we don't care about what happens if either operand is a
8852     // NaN.
8853     //
8854     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
8855                                                        N0.getOperand(1), TLI)) {
8856       if (SDValue FMinMax = combineMinNumMaxNum(
8857               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8858         return FMinMax;
8859     }
8860
8861     // If this select has a condition (setcc) with narrower operands than the
8862     // select, try to widen the compare to match the select width.
8863     // TODO: This should be extended to handle any constant.
8864     // TODO: This could be extended to handle non-loading patterns, but that
8865     //       requires thorough testing to avoid regressions.
8866     if (isNullOrNullSplat(RHS)) {
8867       EVT NarrowVT = LHS.getValueType();
8868       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8869       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8870       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8871       unsigned WideWidth = WideVT.getScalarSizeInBits();
8872       bool IsSigned = isSignedIntSetCC(CC);
8873       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8874       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8875           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8876           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8877           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8878         // Both compare operands can be widened for free. The LHS can use an
8879         // extended load, and the RHS is a constant:
8880         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8881         //   vselect (setcc extload(X), C'), N1, N2
8882         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8883         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8884         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8885         EVT WideSetCCVT = getSetCCResultType(WideVT);
8886         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8887         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8888       }
8889     }
8890   }
8891
8892   if (SimplifySelectOps(N, N1, N2))
8893     return SDValue(N, 0);  // Don't revisit N.
8894
8895   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8896   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8897     return N1;
8898   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8899   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8900     return N2;
8901
8902   // The ConvertSelectToConcatVector function is assuming both the above
8903   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8904   // and addressed.
8905   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8906       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8907       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8908     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8909       return CV;
8910   }
8911
8912   if (SDValue V = foldVSelectOfConstants(N))
8913     return V;
8914
8915   return SDValue();
8916 }
8917
8918 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8919   SDValue N0 = N->getOperand(0);
8920   SDValue N1 = N->getOperand(1);
8921   SDValue N2 = N->getOperand(2);
8922   SDValue N3 = N->getOperand(3);
8923   SDValue N4 = N->getOperand(4);
8924   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8925
8926   // fold select_cc lhs, rhs, x, x, cc -> x
8927   if (N2 == N3)
8928     return N2;
8929
8930   // Determine if the condition we're dealing with is constant
8931   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8932                                   CC, SDLoc(N), false)) {
8933     AddToWorklist(SCC.getNode());
8934
8935     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8936       if (!SCCC->isNullValue())
8937         return N2;    // cond always true -> true val
8938       else
8939         return N3;    // cond always false -> false val
8940     } else if (SCC->isUndef()) {
8941       // When the condition is UNDEF, just return the first operand. This is
8942       // coherent the DAG creation, no setcc node is created in this case
8943       return N2;
8944     } else if (SCC.getOpcode() == ISD::SETCC) {
8945       // Fold to a simpler select_cc
8946       SDValue SelectOp = DAG.getNode(
8947           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
8948           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
8949       SelectOp->setFlags(SCC->getFlags());
8950       return SelectOp;
8951     }
8952   }
8953
8954   // If we can fold this based on the true/false value, do so.
8955   if (SimplifySelectOps(N, N2, N3))
8956     return SDValue(N, 0);  // Don't revisit N.
8957
8958   // fold select_cc into other things, such as min/max/abs
8959   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8960 }
8961
8962 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8963   // setcc is very commonly used as an argument to brcond. This pattern
8964   // also lend itself to numerous combines and, as a result, it is desired
8965   // we keep the argument to a brcond as a setcc as much as possible.
8966   bool PreferSetCC =
8967       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8968
8969   SDValue Combined = SimplifySetCC(
8970       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8971       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8972
8973   if (!Combined)
8974     return SDValue();
8975
8976   // If we prefer to have a setcc, and we don't, we'll try our best to
8977   // recreate one using rebuildSetCC.
8978   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8979     SDValue NewSetCC = rebuildSetCC(Combined);
8980
8981     // We don't have anything interesting to combine to.
8982     if (NewSetCC.getNode() == N)
8983       return SDValue();
8984
8985     if (NewSetCC)
8986       return NewSetCC;
8987   }
8988
8989   return Combined;
8990 }
8991
8992 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8993   SDValue LHS = N->getOperand(0);
8994   SDValue RHS = N->getOperand(1);
8995   SDValue Carry = N->getOperand(2);
8996   SDValue Cond = N->getOperand(3);
8997
8998   // If Carry is false, fold to a regular SETCC.
8999   if (isNullConstant(Carry))
9000     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9001
9002   return SDValue();
9003 }
9004
9005 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9006 /// a build_vector of constants.
9007 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9008 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9009 /// Vector extends are not folded if operations are legal; this is to
9010 /// avoid introducing illegal build_vector dag nodes.
9011 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9012                                          SelectionDAG &DAG, bool LegalTypes) {
9013   unsigned Opcode = N->getOpcode();
9014   SDValue N0 = N->getOperand(0);
9015   EVT VT = N->getValueType(0);
9016   SDLoc DL(N);
9017
9018   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9019          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9020          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9021          && "Expected EXTEND dag node in input!");
9022
9023   // fold (sext c1) -> c1
9024   // fold (zext c1) -> c1
9025   // fold (aext c1) -> c1
9026   if (isa<ConstantSDNode>(N0))
9027     return DAG.getNode(Opcode, DL, VT, N0);
9028
9029   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9030   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9031   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9032   if (N0->getOpcode() == ISD::SELECT) {
9033     SDValue Op1 = N0->getOperand(1);
9034     SDValue Op2 = N0->getOperand(2);
9035     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9036         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9037       // For any_extend, choose sign extension of the constants to allow a
9038       // possible further transform to sign_extend_inreg.i.e.
9039       //
9040       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9041       // t2: i64 = any_extend t1
9042       // -->
9043       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9044       // -->
9045       // t4: i64 = sign_extend_inreg t3
9046       unsigned FoldOpc = Opcode;
9047       if (FoldOpc == ISD::ANY_EXTEND)
9048         FoldOpc = ISD::SIGN_EXTEND;
9049       return DAG.getSelect(DL, VT, N0->getOperand(0),
9050                            DAG.getNode(FoldOpc, DL, VT, Op1),
9051                            DAG.getNode(FoldOpc, DL, VT, Op2));
9052     }
9053   }
9054
9055   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9056   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9057   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9058   EVT SVT = VT.getScalarType();
9059   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9060       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9061     return SDValue();
9062
9063   // We can fold this node into a build_vector.
9064   unsigned VTBits = SVT.getSizeInBits();
9065   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9066   SmallVector<SDValue, 8> Elts;
9067   unsigned NumElts = VT.getVectorNumElements();
9068
9069   // For zero-extensions, UNDEF elements still guarantee to have the upper
9070   // bits set to zero.
9071   bool IsZext =
9072       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9073
9074   for (unsigned i = 0; i != NumElts; ++i) {
9075     SDValue Op = N0.getOperand(i);
9076     if (Op.isUndef()) {
9077       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9078       continue;
9079     }
9080
9081     SDLoc DL(Op);
9082     // Get the constant value and if needed trunc it to the size of the type.
9083     // Nodes like build_vector might have constants wider than the scalar type.
9084     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9085     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9086       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9087     else
9088       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9089   }
9090
9091   return DAG.getBuildVector(VT, DL, Elts);
9092 }
9093
9094 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9095 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9096 // transformation. Returns true if extension are possible and the above
9097 // mentioned transformation is profitable.
9098 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9099                                     unsigned ExtOpc,
9100                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9101                                     const TargetLowering &TLI) {
9102   bool HasCopyToRegUses = false;
9103   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9104   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9105                             UE = N0.getNode()->use_end();
9106        UI != UE; ++UI) {
9107     SDNode *User = *UI;
9108     if (User == N)
9109       continue;
9110     if (UI.getUse().getResNo() != N0.getResNo())
9111       continue;
9112     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9113     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9114       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9115       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9116         // Sign bits will be lost after a zext.
9117         return false;
9118       bool Add = false;
9119       for (unsigned i = 0; i != 2; ++i) {
9120         SDValue UseOp = User->getOperand(i);
9121         if (UseOp == N0)
9122           continue;
9123         if (!isa<ConstantSDNode>(UseOp))
9124           return false;
9125         Add = true;
9126       }
9127       if (Add)
9128         ExtendNodes.push_back(User);
9129       continue;
9130     }
9131     // If truncates aren't free and there are users we can't
9132     // extend, it isn't worthwhile.
9133     if (!isTruncFree)
9134       return false;
9135     // Remember if this value is live-out.
9136     if (User->getOpcode() == ISD::CopyToReg)
9137       HasCopyToRegUses = true;
9138   }
9139
9140   if (HasCopyToRegUses) {
9141     bool BothLiveOut = false;
9142     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9143          UI != UE; ++UI) {
9144       SDUse &Use = UI.getUse();
9145       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9146         BothLiveOut = true;
9147         break;
9148       }
9149     }
9150     if (BothLiveOut)
9151       // Both unextended and extended values are live out. There had better be
9152       // a good reason for the transformation.
9153       return ExtendNodes.size();
9154   }
9155   return true;
9156 }
9157
9158 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9159                                   SDValue OrigLoad, SDValue ExtLoad,
9160                                   ISD::NodeType ExtType) {
9161   // Extend SetCC uses if necessary.
9162   SDLoc DL(ExtLoad);
9163   for (SDNode *SetCC : SetCCs) {
9164     SmallVector<SDValue, 4> Ops;
9165
9166     for (unsigned j = 0; j != 2; ++j) {
9167       SDValue SOp = SetCC->getOperand(j);
9168       if (SOp == OrigLoad)
9169         Ops.push_back(ExtLoad);
9170       else
9171         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9172     }
9173
9174     Ops.push_back(SetCC->getOperand(2));
9175     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9176   }
9177 }
9178
9179 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9180 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9181   SDValue N0 = N->getOperand(0);
9182   EVT DstVT = N->getValueType(0);
9183   EVT SrcVT = N0.getValueType();
9184
9185   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9186           N->getOpcode() == ISD::ZERO_EXTEND) &&
9187          "Unexpected node type (not an extend)!");
9188
9189   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9190   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9191   //   (v8i32 (sext (v8i16 (load x))))
9192   // into:
9193   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9194   //                          (v4i32 (sextload (x + 16)))))
9195   // Where uses of the original load, i.e.:
9196   //   (v8i16 (load x))
9197   // are replaced with:
9198   //   (v8i16 (truncate
9199   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9200   //                            (v4i32 (sextload (x + 16)))))))
9201   //
9202   // This combine is only applicable to illegal, but splittable, vectors.
9203   // All legal types, and illegal non-vector types, are handled elsewhere.
9204   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9205   //
9206   if (N0->getOpcode() != ISD::LOAD)
9207     return SDValue();
9208
9209   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9210
9211   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9212       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
9213       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9214     return SDValue();
9215
9216   SmallVector<SDNode *, 4> SetCCs;
9217   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9218     return SDValue();
9219
9220   ISD::LoadExtType ExtType =
9221       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9222
9223   // Try to split the vector types to get down to legal types.
9224   EVT SplitSrcVT = SrcVT;
9225   EVT SplitDstVT = DstVT;
9226   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9227          SplitSrcVT.getVectorNumElements() > 1) {
9228     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9229     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9230   }
9231
9232   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9233     return SDValue();
9234
9235   SDLoc DL(N);
9236   const unsigned NumSplits =
9237       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9238   const unsigned Stride = SplitSrcVT.getStoreSize();
9239   SmallVector<SDValue, 4> Loads;
9240   SmallVector<SDValue, 4> Chains;
9241
9242   SDValue BasePtr = LN0->getBasePtr();
9243   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9244     const unsigned Offset = Idx * Stride;
9245     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9246
9247     SDValue SplitLoad = DAG.getExtLoad(
9248         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9249         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9250         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9251
9252     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9253                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
9254
9255     Loads.push_back(SplitLoad.getValue(0));
9256     Chains.push_back(SplitLoad.getValue(1));
9257   }
9258
9259   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9260   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9261
9262   // Simplify TF.
9263   AddToWorklist(NewChain.getNode());
9264
9265   CombineTo(N, NewValue);
9266
9267   // Replace uses of the original load (before extension)
9268   // with a truncate of the concatenated sextloaded vectors.
9269   SDValue Trunc =
9270       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9271   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9272   CombineTo(N0.getNode(), Trunc, NewChain);
9273   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9274 }
9275
9276 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9277 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9278 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9279   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9280   EVT VT = N->getValueType(0);
9281   EVT OrigVT = N->getOperand(0).getValueType();
9282   if (TLI.isZExtFree(OrigVT, VT))
9283     return SDValue();
9284
9285   // and/or/xor
9286   SDValue N0 = N->getOperand(0);
9287   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9288         N0.getOpcode() == ISD::XOR) ||
9289       N0.getOperand(1).getOpcode() != ISD::Constant ||
9290       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9291     return SDValue();
9292
9293   // shl/shr
9294   SDValue N1 = N0->getOperand(0);
9295   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9296       N1.getOperand(1).getOpcode() != ISD::Constant ||
9297       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9298     return SDValue();
9299
9300   // load
9301   if (!isa<LoadSDNode>(N1.getOperand(0)))
9302     return SDValue();
9303   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9304   EVT MemVT = Load->getMemoryVT();
9305   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9306       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9307     return SDValue();
9308
9309
9310   // If the shift op is SHL, the logic op must be AND, otherwise the result
9311   // will be wrong.
9312   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9313     return SDValue();
9314
9315   if (!N0.hasOneUse() || !N1.hasOneUse())
9316     return SDValue();
9317
9318   SmallVector<SDNode*, 4> SetCCs;
9319   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9320                                ISD::ZERO_EXTEND, SetCCs, TLI))
9321     return SDValue();
9322
9323   // Actually do the transformation.
9324   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9325                                    Load->getChain(), Load->getBasePtr(),
9326                                    Load->getMemoryVT(), Load->getMemOperand());
9327
9328   SDLoc DL1(N1);
9329   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9330                               N1.getOperand(1));
9331
9332   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9333   Mask = Mask.zext(VT.getSizeInBits());
9334   SDLoc DL0(N0);
9335   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9336                             DAG.getConstant(Mask, DL0, VT));
9337
9338   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9339   CombineTo(N, And);
9340   if (SDValue(Load, 0).hasOneUse()) {
9341     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9342   } else {
9343     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9344                                 Load->getValueType(0), ExtLoad);
9345     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9346   }
9347
9348   // N0 is dead at this point.
9349   recursivelyDeleteUnusedNodes(N0.getNode());
9350
9351   return SDValue(N,0); // Return N so it doesn't get rechecked!
9352 }
9353
9354 /// If we're narrowing or widening the result of a vector select and the final
9355 /// size is the same size as a setcc (compare) feeding the select, then try to
9356 /// apply the cast operation to the select's operands because matching vector
9357 /// sizes for a select condition and other operands should be more efficient.
9358 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9359   unsigned CastOpcode = Cast->getOpcode();
9360   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9361           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9362           CastOpcode == ISD::FP_ROUND) &&
9363          "Unexpected opcode for vector select narrowing/widening");
9364
9365   // We only do this transform before legal ops because the pattern may be
9366   // obfuscated by target-specific operations after legalization. Do not create
9367   // an illegal select op, however, because that may be difficult to lower.
9368   EVT VT = Cast->getValueType(0);
9369   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9370     return SDValue();
9371
9372   SDValue VSel = Cast->getOperand(0);
9373   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9374       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9375     return SDValue();
9376
9377   // Does the setcc have the same vector size as the casted select?
9378   SDValue SetCC = VSel.getOperand(0);
9379   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9380   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9381     return SDValue();
9382
9383   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9384   SDValue A = VSel.getOperand(1);
9385   SDValue B = VSel.getOperand(2);
9386   SDValue CastA, CastB;
9387   SDLoc DL(Cast);
9388   if (CastOpcode == ISD::FP_ROUND) {
9389     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9390     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9391     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9392   } else {
9393     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9394     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9395   }
9396   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9397 }
9398
9399 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9400 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9401 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9402                                      const TargetLowering &TLI, EVT VT,
9403                                      bool LegalOperations, SDNode *N,
9404                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9405   SDNode *N0Node = N0.getNode();
9406   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9407                                                    : ISD::isZEXTLoad(N0Node);
9408   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9409       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9410     return SDValue();
9411
9412   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9413   EVT MemVT = LN0->getMemoryVT();
9414   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
9415       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9416     return SDValue();
9417
9418   SDValue ExtLoad =
9419       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9420                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9421   Combiner.CombineTo(N, ExtLoad);
9422   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9423   if (LN0->use_empty())
9424     Combiner.recursivelyDeleteUnusedNodes(LN0);
9425   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9426 }
9427
9428 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9429 // Only generate vector extloads when 1) they're legal, and 2) they are
9430 // deemed desirable by the target.
9431 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9432                                   const TargetLowering &TLI, EVT VT,
9433                                   bool LegalOperations, SDNode *N, SDValue N0,
9434                                   ISD::LoadExtType ExtLoadType,
9435                                   ISD::NodeType ExtOpc) {
9436   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9437       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9438       ((LegalOperations || VT.isVector() ||
9439         cast<LoadSDNode>(N0)->isVolatile()) &&
9440        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9441     return {};
9442
9443   bool DoXform = true;
9444   SmallVector<SDNode *, 4> SetCCs;
9445   if (!N0.hasOneUse())
9446     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9447   if (VT.isVector())
9448     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9449   if (!DoXform)
9450     return {};
9451
9452   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9453   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9454                                    LN0->getBasePtr(), N0.getValueType(),
9455                                    LN0->getMemOperand());
9456   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9457   // If the load value is used only by N, replace it via CombineTo N.
9458   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9459   Combiner.CombineTo(N, ExtLoad);
9460   if (NoReplaceTrunc) {
9461     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9462     Combiner.recursivelyDeleteUnusedNodes(LN0);
9463   } else {
9464     SDValue Trunc =
9465         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9466     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9467   }
9468   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9469 }
9470
9471 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9472                                        bool LegalOperations) {
9473   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9474           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9475
9476   SDValue SetCC = N->getOperand(0);
9477   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9478       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9479     return SDValue();
9480
9481   SDValue X = SetCC.getOperand(0);
9482   SDValue Ones = SetCC.getOperand(1);
9483   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9484   EVT VT = N->getValueType(0);
9485   EVT XVT = X.getValueType();
9486   // setge X, C is canonicalized to setgt, so we do not need to match that
9487   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9488   // not require the 'not' op.
9489   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9490     // Invert and smear/shift the sign bit:
9491     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9492     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9493     SDLoc DL(N);
9494     SDValue NotX = DAG.getNOT(DL, X, VT);
9495     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9496     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9497     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9498   }
9499   return SDValue();
9500 }
9501
9502 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9503   SDValue N0 = N->getOperand(0);
9504   EVT VT = N->getValueType(0);
9505   SDLoc DL(N);
9506
9507   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9508     return Res;
9509
9510   // fold (sext (sext x)) -> (sext x)
9511   // fold (sext (aext x)) -> (sext x)
9512   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9513     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9514
9515   if (N0.getOpcode() == ISD::TRUNCATE) {
9516     // fold (sext (truncate (load x))) -> (sext (smaller load x))
9517     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9518     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9519       SDNode *oye = N0.getOperand(0).getNode();
9520       if (NarrowLoad.getNode() != N0.getNode()) {
9521         CombineTo(N0.getNode(), NarrowLoad);
9522         // CombineTo deleted the truncate, if needed, but not what's under it.
9523         AddToWorklist(oye);
9524       }
9525       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9526     }
9527
9528     // See if the value being truncated is already sign extended.  If so, just
9529     // eliminate the trunc/sext pair.
9530     SDValue Op = N0.getOperand(0);
9531     unsigned OpBits   = Op.getScalarValueSizeInBits();
9532     unsigned MidBits  = N0.getScalarValueSizeInBits();
9533     unsigned DestBits = VT.getScalarSizeInBits();
9534     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9535
9536     if (OpBits == DestBits) {
9537       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9538       // bits, it is already ready.
9539       if (NumSignBits > DestBits-MidBits)
9540         return Op;
9541     } else if (OpBits < DestBits) {
9542       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9543       // bits, just sext from i32.
9544       if (NumSignBits > OpBits-MidBits)
9545         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9546     } else {
9547       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9548       // bits, just truncate to i32.
9549       if (NumSignBits > OpBits-MidBits)
9550         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9551     }
9552
9553     // fold (sext (truncate x)) -> (sextinreg x).
9554     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9555                                                  N0.getValueType())) {
9556       if (OpBits < DestBits)
9557         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9558       else if (OpBits > DestBits)
9559         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9560       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9561                          DAG.getValueType(N0.getValueType()));
9562     }
9563   }
9564
9565   // Try to simplify (sext (load x)).
9566   if (SDValue foldedExt =
9567           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9568                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9569     return foldedExt;
9570
9571   // fold (sext (load x)) to multiple smaller sextloads.
9572   // Only on illegal but splittable vectors.
9573   if (SDValue ExtLoad = CombineExtLoad(N))
9574     return ExtLoad;
9575
9576   // Try to simplify (sext (sextload x)).
9577   if (SDValue foldedExt = tryToFoldExtOfExtload(
9578           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9579     return foldedExt;
9580
9581   // fold (sext (and/or/xor (load x), cst)) ->
9582   //      (and/or/xor (sextload x), (sext cst))
9583   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9584        N0.getOpcode() == ISD::XOR) &&
9585       isa<LoadSDNode>(N0.getOperand(0)) &&
9586       N0.getOperand(1).getOpcode() == ISD::Constant &&
9587       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9588     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9589     EVT MemVT = LN00->getMemoryVT();
9590     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9591       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9592       SmallVector<SDNode*, 4> SetCCs;
9593       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9594                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9595       if (DoXform) {
9596         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9597                                          LN00->getChain(), LN00->getBasePtr(),
9598                                          LN00->getMemoryVT(),
9599                                          LN00->getMemOperand());
9600         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9601         Mask = Mask.sext(VT.getSizeInBits());
9602         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9603                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9604         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9605         bool NoReplaceTruncAnd = !N0.hasOneUse();
9606         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9607         CombineTo(N, And);
9608         // If N0 has multiple uses, change other uses as well.
9609         if (NoReplaceTruncAnd) {
9610           SDValue TruncAnd =
9611               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9612           CombineTo(N0.getNode(), TruncAnd);
9613         }
9614         if (NoReplaceTrunc) {
9615           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9616         } else {
9617           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9618                                       LN00->getValueType(0), ExtLoad);
9619           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9620         }
9621         return SDValue(N,0); // Return N so it doesn't get rechecked!
9622       }
9623     }
9624   }
9625
9626   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9627     return V;
9628
9629   if (N0.getOpcode() == ISD::SETCC) {
9630     SDValue N00 = N0.getOperand(0);
9631     SDValue N01 = N0.getOperand(1);
9632     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9633     EVT N00VT = N0.getOperand(0).getValueType();
9634
9635     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9636     // Only do this before legalize for now.
9637     if (VT.isVector() && !LegalOperations &&
9638         TLI.getBooleanContents(N00VT) ==
9639             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9640       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9641       // of the same size as the compared operands. Only optimize sext(setcc())
9642       // if this is the case.
9643       EVT SVT = getSetCCResultType(N00VT);
9644
9645       // If we already have the desired type, don't change it.
9646       if (SVT != N0.getValueType()) {
9647         // We know that the # elements of the results is the same as the
9648         // # elements of the compare (and the # elements of the compare result
9649         // for that matter).  Check to see that they are the same size.  If so,
9650         // we know that the element size of the sext'd result matches the
9651         // element size of the compare operands.
9652         if (VT.getSizeInBits() == SVT.getSizeInBits())
9653           return DAG.getSetCC(DL, VT, N00, N01, CC);
9654
9655         // If the desired elements are smaller or larger than the source
9656         // elements, we can use a matching integer vector type and then
9657         // truncate/sign extend.
9658         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9659         if (SVT == MatchingVecType) {
9660           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9661           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9662         }
9663       }
9664     }
9665
9666     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9667     // Here, T can be 1 or -1, depending on the type of the setcc and
9668     // getBooleanContents().
9669     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9670
9671     // To determine the "true" side of the select, we need to know the high bit
9672     // of the value returned by the setcc if it evaluates to true.
9673     // If the type of the setcc is i1, then the true case of the select is just
9674     // sext(i1 1), that is, -1.
9675     // If the type of the setcc is larger (say, i8) then the value of the high
9676     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9677     // of the appropriate width.
9678     SDValue ExtTrueVal = (SetCCWidth == 1)
9679                              ? DAG.getAllOnesConstant(DL, VT)
9680                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9681     SDValue Zero = DAG.getConstant(0, DL, VT);
9682     if (SDValue SCC =
9683             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9684       return SCC;
9685
9686     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9687       EVT SetCCVT = getSetCCResultType(N00VT);
9688       // Don't do this transform for i1 because there's a select transform
9689       // that would reverse it.
9690       // TODO: We should not do this transform at all without a target hook
9691       // because a sext is likely cheaper than a select?
9692       if (SetCCVT.getScalarSizeInBits() != 1 &&
9693           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9694         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9695         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9696       }
9697     }
9698   }
9699
9700   // fold (sext x) -> (zext x) if the sign bit is known zero.
9701   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9702       DAG.SignBitIsZero(N0))
9703     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9704
9705   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9706     return NewVSel;
9707
9708   // Eliminate this sign extend by doing a negation in the destination type:
9709   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9710   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9711       isNullOrNullSplat(N0.getOperand(0)) &&
9712       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9713       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9714     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9715     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9716   }
9717   // Eliminate this sign extend by doing a decrement in the destination type:
9718   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9719   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9720       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9721       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9722       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9723     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9724     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9725   }
9726
9727   return SDValue();
9728 }
9729
9730 // isTruncateOf - If N is a truncate of some other value, return true, record
9731 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9732 // This function computes KnownBits to avoid a duplicated call to
9733 // computeKnownBits in the caller.
9734 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9735                          KnownBits &Known) {
9736   if (N->getOpcode() == ISD::TRUNCATE) {
9737     Op = N->getOperand(0);
9738     Known = DAG.computeKnownBits(Op);
9739     return true;
9740   }
9741
9742   if (N.getOpcode() != ISD::SETCC ||
9743       N.getValueType().getScalarType() != MVT::i1 ||
9744       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9745     return false;
9746
9747   SDValue Op0 = N->getOperand(0);
9748   SDValue Op1 = N->getOperand(1);
9749   assert(Op0.getValueType() == Op1.getValueType());
9750
9751   if (isNullOrNullSplat(Op0))
9752     Op = Op1;
9753   else if (isNullOrNullSplat(Op1))
9754     Op = Op0;
9755   else
9756     return false;
9757
9758   Known = DAG.computeKnownBits(Op);
9759
9760   return (Known.Zero | 1).isAllOnesValue();
9761 }
9762
9763 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9764   SDValue N0 = N->getOperand(0);
9765   EVT VT = N->getValueType(0);
9766
9767   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9768     return Res;
9769
9770   // fold (zext (zext x)) -> (zext x)
9771   // fold (zext (aext x)) -> (zext x)
9772   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9773     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9774                        N0.getOperand(0));
9775
9776   // fold (zext (truncate x)) -> (zext x) or
9777   //      (zext (truncate x)) -> (truncate x)
9778   // This is valid when the truncated bits of x are already zero.
9779   SDValue Op;
9780   KnownBits Known;
9781   if (isTruncateOf(DAG, N0, Op, Known)) {
9782     APInt TruncatedBits =
9783       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9784       APInt(Op.getScalarValueSizeInBits(), 0) :
9785       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9786                         N0.getScalarValueSizeInBits(),
9787                         std::min(Op.getScalarValueSizeInBits(),
9788                                  VT.getScalarSizeInBits()));
9789     if (TruncatedBits.isSubsetOf(Known.Zero))
9790       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9791   }
9792
9793   // fold (zext (truncate x)) -> (and x, mask)
9794   if (N0.getOpcode() == ISD::TRUNCATE) {
9795     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9796     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9797     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9798       SDNode *oye = N0.getOperand(0).getNode();
9799       if (NarrowLoad.getNode() != N0.getNode()) {
9800         CombineTo(N0.getNode(), NarrowLoad);
9801         // CombineTo deleted the truncate, if needed, but not what's under it.
9802         AddToWorklist(oye);
9803       }
9804       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9805     }
9806
9807     EVT SrcVT = N0.getOperand(0).getValueType();
9808     EVT MinVT = N0.getValueType();
9809
9810     // Try to mask before the extension to avoid having to generate a larger mask,
9811     // possibly over several sub-vectors.
9812     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9813       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9814                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9815         SDValue Op = N0.getOperand(0);
9816         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9817         AddToWorklist(Op.getNode());
9818         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9819         // Transfer the debug info; the new node is equivalent to N0.
9820         DAG.transferDbgValues(N0, ZExtOrTrunc);
9821         return ZExtOrTrunc;
9822       }
9823     }
9824
9825     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9826       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9827       AddToWorklist(Op.getNode());
9828       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9829       // We may safely transfer the debug info describing the truncate node over
9830       // to the equivalent and operation.
9831       DAG.transferDbgValues(N0, And);
9832       return And;
9833     }
9834   }
9835
9836   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9837   // if either of the casts is not free.
9838   if (N0.getOpcode() == ISD::AND &&
9839       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9840       N0.getOperand(1).getOpcode() == ISD::Constant &&
9841       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9842                            N0.getValueType()) ||
9843        !TLI.isZExtFree(N0.getValueType(), VT))) {
9844     SDValue X = N0.getOperand(0).getOperand(0);
9845     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9846     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9847     Mask = Mask.zext(VT.getSizeInBits());
9848     SDLoc DL(N);
9849     return DAG.getNode(ISD::AND, DL, VT,
9850                        X, DAG.getConstant(Mask, DL, VT));
9851   }
9852
9853   // Try to simplify (zext (load x)).
9854   if (SDValue foldedExt =
9855           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9856                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9857     return foldedExt;
9858
9859   // fold (zext (load x)) to multiple smaller zextloads.
9860   // Only on illegal but splittable vectors.
9861   if (SDValue ExtLoad = CombineExtLoad(N))
9862     return ExtLoad;
9863
9864   // fold (zext (and/or/xor (load x), cst)) ->
9865   //      (and/or/xor (zextload x), (zext cst))
9866   // Unless (and (load x) cst) will match as a zextload already and has
9867   // additional users.
9868   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9869        N0.getOpcode() == ISD::XOR) &&
9870       isa<LoadSDNode>(N0.getOperand(0)) &&
9871       N0.getOperand(1).getOpcode() == ISD::Constant &&
9872       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9873     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9874     EVT MemVT = LN00->getMemoryVT();
9875     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9876         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9877       bool DoXform = true;
9878       SmallVector<SDNode*, 4> SetCCs;
9879       if (!N0.hasOneUse()) {
9880         if (N0.getOpcode() == ISD::AND) {
9881           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9882           EVT LoadResultTy = AndC->getValueType(0);
9883           EVT ExtVT;
9884           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9885             DoXform = false;
9886         }
9887       }
9888       if (DoXform)
9889         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9890                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9891       if (DoXform) {
9892         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9893                                          LN00->getChain(), LN00->getBasePtr(),
9894                                          LN00->getMemoryVT(),
9895                                          LN00->getMemOperand());
9896         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9897         Mask = Mask.zext(VT.getSizeInBits());
9898         SDLoc DL(N);
9899         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9900                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9901         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9902         bool NoReplaceTruncAnd = !N0.hasOneUse();
9903         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9904         CombineTo(N, And);
9905         // If N0 has multiple uses, change other uses as well.
9906         if (NoReplaceTruncAnd) {
9907           SDValue TruncAnd =
9908               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9909           CombineTo(N0.getNode(), TruncAnd);
9910         }
9911         if (NoReplaceTrunc) {
9912           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9913         } else {
9914           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9915                                       LN00->getValueType(0), ExtLoad);
9916           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9917         }
9918         return SDValue(N,0); // Return N so it doesn't get rechecked!
9919       }
9920     }
9921   }
9922
9923   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9924   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9925   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9926     return ZExtLoad;
9927
9928   // Try to simplify (zext (zextload x)).
9929   if (SDValue foldedExt = tryToFoldExtOfExtload(
9930           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9931     return foldedExt;
9932
9933   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9934     return V;
9935
9936   if (N0.getOpcode() == ISD::SETCC) {
9937     // Only do this before legalize for now.
9938     if (!LegalOperations && VT.isVector() &&
9939         N0.getValueType().getVectorElementType() == MVT::i1) {
9940       EVT N00VT = N0.getOperand(0).getValueType();
9941       if (getSetCCResultType(N00VT) == N0.getValueType())
9942         return SDValue();
9943
9944       // We know that the # elements of the results is the same as the #
9945       // elements of the compare (and the # elements of the compare result for
9946       // that matter). Check to see that they are the same size. If so, we know
9947       // that the element size of the sext'd result matches the element size of
9948       // the compare operands.
9949       SDLoc DL(N);
9950       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9951       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9952         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9953         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9954                                      N0.getOperand(1), N0.getOperand(2));
9955         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9956       }
9957
9958       // If the desired elements are smaller or larger than the source
9959       // elements we can use a matching integer vector type and then
9960       // truncate/sign extend.
9961       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9962       SDValue VsetCC =
9963           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9964                       N0.getOperand(1), N0.getOperand(2));
9965       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9966                          VecOnes);
9967     }
9968
9969     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9970     SDLoc DL(N);
9971     if (SDValue SCC = SimplifySelectCC(
9972             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9973             DAG.getConstant(0, DL, VT),
9974             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9975       return SCC;
9976   }
9977
9978   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9979   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9980       isa<ConstantSDNode>(N0.getOperand(1)) &&
9981       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9982       N0.hasOneUse()) {
9983     SDValue ShAmt = N0.getOperand(1);
9984     if (N0.getOpcode() == ISD::SHL) {
9985       SDValue InnerZExt = N0.getOperand(0);
9986       // If the original shl may be shifting out bits, do not perform this
9987       // transformation.
9988       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9989         InnerZExt.getOperand(0).getValueSizeInBits();
9990       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
9991         return SDValue();
9992     }
9993
9994     SDLoc DL(N);
9995
9996     // Ensure that the shift amount is wide enough for the shifted value.
9997     if (VT.getSizeInBits() >= 256)
9998       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9999
10000     return DAG.getNode(N0.getOpcode(), DL, VT,
10001                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10002                        ShAmt);
10003   }
10004
10005   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10006     return NewVSel;
10007
10008   return SDValue();
10009 }
10010
10011 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10012   SDValue N0 = N->getOperand(0);
10013   EVT VT = N->getValueType(0);
10014
10015   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10016     return Res;
10017
10018   // fold (aext (aext x)) -> (aext x)
10019   // fold (aext (zext x)) -> (zext x)
10020   // fold (aext (sext x)) -> (sext x)
10021   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10022       N0.getOpcode() == ISD::ZERO_EXTEND ||
10023       N0.getOpcode() == ISD::SIGN_EXTEND)
10024     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10025
10026   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10027   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10028   if (N0.getOpcode() == ISD::TRUNCATE) {
10029     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10030       SDNode *oye = N0.getOperand(0).getNode();
10031       if (NarrowLoad.getNode() != N0.getNode()) {
10032         CombineTo(N0.getNode(), NarrowLoad);
10033         // CombineTo deleted the truncate, if needed, but not what's under it.
10034         AddToWorklist(oye);
10035       }
10036       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10037     }
10038   }
10039
10040   // fold (aext (truncate x))
10041   if (N0.getOpcode() == ISD::TRUNCATE)
10042     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10043
10044   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10045   // if the trunc is not free.
10046   if (N0.getOpcode() == ISD::AND &&
10047       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10048       N0.getOperand(1).getOpcode() == ISD::Constant &&
10049       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10050                           N0.getValueType())) {
10051     SDLoc DL(N);
10052     SDValue X = N0.getOperand(0).getOperand(0);
10053     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10054     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
10055     Mask = Mask.zext(VT.getSizeInBits());
10056     return DAG.getNode(ISD::AND, DL, VT,
10057                        X, DAG.getConstant(Mask, DL, VT));
10058   }
10059
10060   // fold (aext (load x)) -> (aext (truncate (extload x)))
10061   // None of the supported targets knows how to perform load and any_ext
10062   // on vectors in one instruction.  We only perform this transformation on
10063   // scalars.
10064   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10065       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10066       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10067     bool DoXform = true;
10068     SmallVector<SDNode*, 4> SetCCs;
10069     if (!N0.hasOneUse())
10070       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10071                                         TLI);
10072     if (DoXform) {
10073       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10074       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10075                                        LN0->getChain(),
10076                                        LN0->getBasePtr(), N0.getValueType(),
10077                                        LN0->getMemOperand());
10078       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10079       // If the load value is used only by N, replace it via CombineTo N.
10080       bool NoReplaceTrunc = N0.hasOneUse();
10081       CombineTo(N, ExtLoad);
10082       if (NoReplaceTrunc) {
10083         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10084         recursivelyDeleteUnusedNodes(LN0);
10085       } else {
10086         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10087                                     N0.getValueType(), ExtLoad);
10088         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10089       }
10090       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10091     }
10092   }
10093
10094   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10095   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10096   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10097   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10098       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10099     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10100     ISD::LoadExtType ExtType = LN0->getExtensionType();
10101     EVT MemVT = LN0->getMemoryVT();
10102     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10103       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10104                                        VT, LN0->getChain(), LN0->getBasePtr(),
10105                                        MemVT, LN0->getMemOperand());
10106       CombineTo(N, ExtLoad);
10107       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10108       recursivelyDeleteUnusedNodes(LN0);
10109       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10110     }
10111   }
10112
10113   if (N0.getOpcode() == ISD::SETCC) {
10114     // For vectors:
10115     // aext(setcc) -> vsetcc
10116     // aext(setcc) -> truncate(vsetcc)
10117     // aext(setcc) -> aext(vsetcc)
10118     // Only do this before legalize for now.
10119     if (VT.isVector() && !LegalOperations) {
10120       EVT N00VT = N0.getOperand(0).getValueType();
10121       if (getSetCCResultType(N00VT) == N0.getValueType())
10122         return SDValue();
10123
10124       // We know that the # elements of the results is the same as the
10125       // # elements of the compare (and the # elements of the compare result
10126       // for that matter).  Check to see that they are the same size.  If so,
10127       // we know that the element size of the sext'd result matches the
10128       // element size of the compare operands.
10129       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10130         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10131                              N0.getOperand(1),
10132                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10133
10134       // If the desired elements are smaller or larger than the source
10135       // elements we can use a matching integer vector type and then
10136       // truncate/any extend
10137       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10138       SDValue VsetCC =
10139         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10140                       N0.getOperand(1),
10141                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10142       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10143     }
10144
10145     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10146     SDLoc DL(N);
10147     if (SDValue SCC = SimplifySelectCC(
10148             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10149             DAG.getConstant(0, DL, VT),
10150             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10151       return SCC;
10152   }
10153
10154   return SDValue();
10155 }
10156
10157 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10158   unsigned Opcode = N->getOpcode();
10159   SDValue N0 = N->getOperand(0);
10160   SDValue N1 = N->getOperand(1);
10161   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10162
10163   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10164   if (N0.getOpcode() == Opcode &&
10165       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10166     return N0;
10167
10168   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10169       N0.getOperand(0).getOpcode() == Opcode) {
10170     // We have an assert, truncate, assert sandwich. Make one stronger assert
10171     // by asserting on the smallest asserted type to the larger source type.
10172     // This eliminates the later assert:
10173     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10174     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10175     SDValue BigA = N0.getOperand(0);
10176     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10177     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10178            "Asserting zero/sign-extended bits to a type larger than the "
10179            "truncated destination does not provide information");
10180
10181     SDLoc DL(N);
10182     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10183     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10184     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10185                                     BigA.getOperand(0), MinAssertVTVal);
10186     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10187   }
10188
10189   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10190   // than X. Just move the AssertZext in front of the truncate and drop the
10191   // AssertSExt.
10192   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10193       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10194       Opcode == ISD::AssertZext) {
10195     SDValue BigA = N0.getOperand(0);
10196     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10197     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10198            "Asserting zero/sign-extended bits to a type larger than the "
10199            "truncated destination does not provide information");
10200
10201     if (AssertVT.bitsLT(BigA_AssertVT)) {
10202       SDLoc DL(N);
10203       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10204                                       BigA.getOperand(0), N1);
10205       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10206     }
10207   }
10208
10209   return SDValue();
10210 }
10211
10212 /// If the result of a wider load is shifted to right of N  bits and then
10213 /// truncated to a narrower type and where N is a multiple of number of bits of
10214 /// the narrower type, transform it to a narrower load from address + N / num of
10215 /// bits of new type. Also narrow the load if the result is masked with an AND
10216 /// to effectively produce a smaller type. If the result is to be extended, also
10217 /// fold the extension to form a extending load.
10218 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10219   unsigned Opc = N->getOpcode();
10220
10221   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10222   SDValue N0 = N->getOperand(0);
10223   EVT VT = N->getValueType(0);
10224   EVT ExtVT = VT;
10225
10226   // This transformation isn't valid for vector loads.
10227   if (VT.isVector())
10228     return SDValue();
10229
10230   unsigned ShAmt = 0;
10231   bool HasShiftedOffset = false;
10232   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10233   // extended to VT.
10234   if (Opc == ISD::SIGN_EXTEND_INREG) {
10235     ExtType = ISD::SEXTLOAD;
10236     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10237   } else if (Opc == ISD::SRL) {
10238     // Another special-case: SRL is basically zero-extending a narrower value,
10239     // or it maybe shifting a higher subword, half or byte into the lowest
10240     // bits.
10241     ExtType = ISD::ZEXTLOAD;
10242     N0 = SDValue(N, 0);
10243
10244     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10245     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10246     if (!N01 || !LN0)
10247       return SDValue();
10248
10249     uint64_t ShiftAmt = N01->getZExtValue();
10250     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10251     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10252       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10253     else
10254       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10255                                 VT.getSizeInBits() - ShiftAmt);
10256   } else if (Opc == ISD::AND) {
10257     // An AND with a constant mask is the same as a truncate + zero-extend.
10258     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10259     if (!AndC)
10260       return SDValue();
10261
10262     const APInt &Mask = AndC->getAPIntValue();
10263     unsigned ActiveBits = 0;
10264     if (Mask.isMask()) {
10265       ActiveBits = Mask.countTrailingOnes();
10266     } else if (Mask.isShiftedMask()) {
10267       ShAmt = Mask.countTrailingZeros();
10268       APInt ShiftedMask = Mask.lshr(ShAmt);
10269       ActiveBits = ShiftedMask.countTrailingOnes();
10270       HasShiftedOffset = true;
10271     } else
10272       return SDValue();
10273
10274     ExtType = ISD::ZEXTLOAD;
10275     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10276   }
10277
10278   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10279     SDValue SRL = N0;
10280     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10281       ShAmt = ConstShift->getZExtValue();
10282       unsigned EVTBits = ExtVT.getSizeInBits();
10283       // Is the shift amount a multiple of size of VT?
10284       if ((ShAmt & (EVTBits-1)) == 0) {
10285         N0 = N0.getOperand(0);
10286         // Is the load width a multiple of size of VT?
10287         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10288           return SDValue();
10289       }
10290
10291       // At this point, we must have a load or else we can't do the transform.
10292       if (!isa<LoadSDNode>(N0)) return SDValue();
10293
10294       auto *LN0 = cast<LoadSDNode>(N0);
10295
10296       // Because a SRL must be assumed to *need* to zero-extend the high bits
10297       // (as opposed to anyext the high bits), we can't combine the zextload
10298       // lowering of SRL and an sextload.
10299       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10300         return SDValue();
10301
10302       // If the shift amount is larger than the input type then we're not
10303       // accessing any of the loaded bytes.  If the load was a zextload/extload
10304       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10305       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10306         return SDValue();
10307
10308       // If the SRL is only used by a masking AND, we may be able to adjust
10309       // the ExtVT to make the AND redundant.
10310       SDNode *Mask = *(SRL->use_begin());
10311       if (Mask->getOpcode() == ISD::AND &&
10312           isa<ConstantSDNode>(Mask->getOperand(1))) {
10313         const APInt &ShiftMask =
10314           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10315         if (ShiftMask.isMask()) {
10316           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10317                                            ShiftMask.countTrailingOnes());
10318           // If the mask is smaller, recompute the type.
10319           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10320               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10321             ExtVT = MaskedVT;
10322         }
10323       }
10324     }
10325   }
10326
10327   // If the load is shifted left (and the result isn't shifted back right),
10328   // we can fold the truncate through the shift.
10329   unsigned ShLeftAmt = 0;
10330   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10331       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10332     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10333       ShLeftAmt = N01->getZExtValue();
10334       N0 = N0.getOperand(0);
10335     }
10336   }
10337
10338   // If we haven't found a load, we can't narrow it.
10339   if (!isa<LoadSDNode>(N0))
10340     return SDValue();
10341
10342   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10343   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10344     return SDValue();
10345
10346   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10347     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10348     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10349     return LVTStoreBits - EVTStoreBits - ShAmt;
10350   };
10351
10352   // For big endian targets, we need to adjust the offset to the pointer to
10353   // load the correct bytes.
10354   if (DAG.getDataLayout().isBigEndian())
10355     ShAmt = AdjustBigEndianShift(ShAmt);
10356
10357   EVT PtrType = N0.getOperand(1).getValueType();
10358   uint64_t PtrOff = ShAmt / 8;
10359   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10360   SDLoc DL(LN0);
10361   // The original load itself didn't wrap, so an offset within it doesn't.
10362   SDNodeFlags Flags;
10363   Flags.setNoUnsignedWrap(true);
10364   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
10365                                PtrType, LN0->getBasePtr(),
10366                                DAG.getConstant(PtrOff, DL, PtrType),
10367                                Flags);
10368   AddToWorklist(NewPtr.getNode());
10369
10370   SDValue Load;
10371   if (ExtType == ISD::NON_EXTLOAD)
10372     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
10373                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10374                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10375   else
10376     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
10377                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10378                           NewAlign, LN0->getMemOperand()->getFlags(),
10379                           LN0->getAAInfo());
10380
10381   // Replace the old load's chain with the new load's chain.
10382   WorklistRemover DeadNodes(*this);
10383   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10384
10385   // Shift the result left, if we've swallowed a left shift.
10386   SDValue Result = Load;
10387   if (ShLeftAmt != 0) {
10388     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10389     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10390       ShImmTy = VT;
10391     // If the shift amount is as large as the result size (but, presumably,
10392     // no larger than the source) then the useful bits of the result are
10393     // zero; we can't simply return the shortened shift, because the result
10394     // of that operation is undefined.
10395     SDLoc DL(N0);
10396     if (ShLeftAmt >= VT.getSizeInBits())
10397       Result = DAG.getConstant(0, DL, VT);
10398     else
10399       Result = DAG.getNode(ISD::SHL, DL, VT,
10400                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10401   }
10402
10403   if (HasShiftedOffset) {
10404     // Recalculate the shift amount after it has been altered to calculate
10405     // the offset.
10406     if (DAG.getDataLayout().isBigEndian())
10407       ShAmt = AdjustBigEndianShift(ShAmt);
10408
10409     // We're using a shifted mask, so the load now has an offset. This means
10410     // that data has been loaded into the lower bytes than it would have been
10411     // before, so we need to shl the loaded data into the correct position in the
10412     // register.
10413     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10414     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10415     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10416   }
10417
10418   // Return the new loaded value.
10419   return Result;
10420 }
10421
10422 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10423   SDValue N0 = N->getOperand(0);
10424   SDValue N1 = N->getOperand(1);
10425   EVT VT = N->getValueType(0);
10426   EVT EVT = cast<VTSDNode>(N1)->getVT();
10427   unsigned VTBits = VT.getScalarSizeInBits();
10428   unsigned EVTBits = EVT.getScalarSizeInBits();
10429
10430   if (N0.isUndef())
10431     return DAG.getUNDEF(VT);
10432
10433   // fold (sext_in_reg c1) -> c1
10434   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10435     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10436
10437   // If the input is already sign extended, just drop the extension.
10438   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10439     return N0;
10440
10441   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10442   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10443       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10444     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10445                        N0.getOperand(0), N1);
10446
10447   // fold (sext_in_reg (sext x)) -> (sext x)
10448   // fold (sext_in_reg (aext x)) -> (sext x)
10449   // if x is small enough or if we know that x has more than 1 sign bit and the
10450   // sign_extend_inreg is extending from one of them.
10451   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10452     SDValue N00 = N0.getOperand(0);
10453     unsigned N00Bits = N00.getScalarValueSizeInBits();
10454     if ((N00Bits <= EVTBits ||
10455          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10456         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10457       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10458   }
10459
10460   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10461   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10462        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
10463        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
10464       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10465     if (!LegalOperations ||
10466         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
10467       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10468                          N0.getOperand(0));
10469   }
10470
10471   // fold (sext_in_reg (zext x)) -> (sext x)
10472   // iff we are extending the source sign bit.
10473   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10474     SDValue N00 = N0.getOperand(0);
10475     if (N00.getScalarValueSizeInBits() == EVTBits &&
10476         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10477       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10478   }
10479
10480   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10481   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10482     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10483
10484   // fold operands of sext_in_reg based on knowledge that the top bits are not
10485   // demanded.
10486   if (SimplifyDemandedBits(SDValue(N, 0)))
10487     return SDValue(N, 0);
10488
10489   // fold (sext_in_reg (load x)) -> (smaller sextload x)
10490   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10491   if (SDValue NarrowLoad = ReduceLoadWidth(N))
10492     return NarrowLoad;
10493
10494   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10495   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10496   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10497   if (N0.getOpcode() == ISD::SRL) {
10498     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10499       if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10500         // We can turn this into an SRA iff the input to the SRL is already sign
10501         // extended enough.
10502         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10503         if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10504           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10505                              N0.getOperand(1));
10506       }
10507   }
10508
10509   // fold (sext_inreg (extload x)) -> (sextload x)
10510   // If sextload is not supported by target, we can only do the combine when
10511   // load has one use. Doing otherwise can block folding the extload with other
10512   // extends that the target does support.
10513   if (ISD::isEXTLoad(N0.getNode()) &&
10514       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10515       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10516       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
10517         N0.hasOneUse()) ||
10518        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10519     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10520     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10521                                      LN0->getChain(),
10522                                      LN0->getBasePtr(), EVT,
10523                                      LN0->getMemOperand());
10524     CombineTo(N, ExtLoad);
10525     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10526     AddToWorklist(ExtLoad.getNode());
10527     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10528   }
10529   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10530   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10531       N0.hasOneUse() &&
10532       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10533       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10534        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10535     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10536     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10537                                      LN0->getChain(),
10538                                      LN0->getBasePtr(), EVT,
10539                                      LN0->getMemOperand());
10540     CombineTo(N, ExtLoad);
10541     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10542     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10543   }
10544
10545   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10546   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10547     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10548                                            N0.getOperand(1), false))
10549       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10550                          BSwap, N1);
10551   }
10552
10553   return SDValue();
10554 }
10555
10556 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10557   SDValue N0 = N->getOperand(0);
10558   EVT VT = N->getValueType(0);
10559
10560   if (N0.isUndef())
10561     return DAG.getUNDEF(VT);
10562
10563   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10564     return Res;
10565
10566   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10567     return SDValue(N, 0);
10568
10569   return SDValue();
10570 }
10571
10572 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10573   SDValue N0 = N->getOperand(0);
10574   EVT VT = N->getValueType(0);
10575
10576   if (N0.isUndef())
10577     return DAG.getUNDEF(VT);
10578
10579   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10580     return Res;
10581
10582   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10583     return SDValue(N, 0);
10584
10585   return SDValue();
10586 }
10587
10588 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10589   SDValue N0 = N->getOperand(0);
10590   EVT VT = N->getValueType(0);
10591   EVT SrcVT = N0.getValueType();
10592   bool isLE = DAG.getDataLayout().isLittleEndian();
10593
10594   // noop truncate
10595   if (SrcVT == VT)
10596     return N0;
10597
10598   // fold (truncate (truncate x)) -> (truncate x)
10599   if (N0.getOpcode() == ISD::TRUNCATE)
10600     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10601
10602   // fold (truncate c1) -> c1
10603   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10604     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10605     if (C.getNode() != N)
10606       return C;
10607   }
10608
10609   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10610   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10611       N0.getOpcode() == ISD::SIGN_EXTEND ||
10612       N0.getOpcode() == ISD::ANY_EXTEND) {
10613     // if the source is smaller than the dest, we still need an extend.
10614     if (N0.getOperand(0).getValueType().bitsLT(VT))
10615       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10616     // if the source is larger than the dest, than we just need the truncate.
10617     if (N0.getOperand(0).getValueType().bitsGT(VT))
10618       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10619     // if the source and dest are the same type, we can drop both the extend
10620     // and the truncate.
10621     return N0.getOperand(0);
10622   }
10623
10624   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10625   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10626     return SDValue();
10627
10628   // Fold extract-and-trunc into a narrow extract. For example:
10629   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10630   //   i32 y = TRUNCATE(i64 x)
10631   //        -- becomes --
10632   //   v16i8 b = BITCAST (v2i64 val)
10633   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10634   //
10635   // Note: We only run this optimization after type legalization (which often
10636   // creates this pattern) and before operation legalization after which
10637   // we need to be more careful about the vector instructions that we generate.
10638   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10639       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10640     EVT VecTy = N0.getOperand(0).getValueType();
10641     EVT ExTy = N0.getValueType();
10642     EVT TrTy = N->getValueType(0);
10643
10644     unsigned NumElem = VecTy.getVectorNumElements();
10645     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10646
10647     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10648     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10649
10650     SDValue EltNo = N0->getOperand(1);
10651     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10652       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10653       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10654       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10655
10656       SDLoc DL(N);
10657       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10658                          DAG.getBitcast(NVT, N0.getOperand(0)),
10659                          DAG.getConstant(Index, DL, IndexTy));
10660     }
10661   }
10662
10663   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10664   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10665     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10666         TLI.isTruncateFree(SrcVT, VT)) {
10667       SDLoc SL(N0);
10668       SDValue Cond = N0.getOperand(0);
10669       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10670       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10671       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10672     }
10673   }
10674
10675   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10676   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10677       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10678       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10679     SDValue Amt = N0.getOperand(1);
10680     KnownBits Known = DAG.computeKnownBits(Amt);
10681     unsigned Size = VT.getScalarSizeInBits();
10682     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10683       SDLoc SL(N);
10684       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10685
10686       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10687       if (AmtVT != Amt.getValueType()) {
10688         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10689         AddToWorklist(Amt.getNode());
10690       }
10691       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10692     }
10693   }
10694
10695   // Attempt to pre-truncate BUILD_VECTOR sources.
10696   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10697       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10698     SDLoc DL(N);
10699     EVT SVT = VT.getScalarType();
10700     SmallVector<SDValue, 8> TruncOps;
10701     for (const SDValue &Op : N0->op_values()) {
10702       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10703       TruncOps.push_back(TruncOp);
10704     }
10705     return DAG.getBuildVector(VT, DL, TruncOps);
10706   }
10707
10708   // Fold a series of buildvector, bitcast, and truncate if possible.
10709   // For example fold
10710   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10711   //   (2xi32 (buildvector x, y)).
10712   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10713       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10714       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10715       N0.getOperand(0).hasOneUse()) {
10716     SDValue BuildVect = N0.getOperand(0);
10717     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10718     EVT TruncVecEltTy = VT.getVectorElementType();
10719
10720     // Check that the element types match.
10721     if (BuildVectEltTy == TruncVecEltTy) {
10722       // Now we only need to compute the offset of the truncated elements.
10723       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10724       unsigned TruncVecNumElts = VT.getVectorNumElements();
10725       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10726
10727       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10728              "Invalid number of elements");
10729
10730       SmallVector<SDValue, 8> Opnds;
10731       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10732         Opnds.push_back(BuildVect.getOperand(i));
10733
10734       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10735     }
10736   }
10737
10738   // See if we can simplify the input to this truncate through knowledge that
10739   // only the low bits are being used.
10740   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10741   // Currently we only perform this optimization on scalars because vectors
10742   // may have different active low bits.
10743   if (!VT.isVector()) {
10744     APInt Mask =
10745         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10746     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10747       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10748   }
10749
10750   // fold (truncate (load x)) -> (smaller load x)
10751   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10752   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10753     if (SDValue Reduced = ReduceLoadWidth(N))
10754       return Reduced;
10755
10756     // Handle the case where the load remains an extending load even
10757     // after truncation.
10758     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10759       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10760       if (!LN0->isVolatile() &&
10761           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10762         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10763                                          VT, LN0->getChain(), LN0->getBasePtr(),
10764                                          LN0->getMemoryVT(),
10765                                          LN0->getMemOperand());
10766         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10767         return NewLoad;
10768       }
10769     }
10770   }
10771
10772   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10773   // where ... are all 'undef'.
10774   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10775     SmallVector<EVT, 8> VTs;
10776     SDValue V;
10777     unsigned Idx = 0;
10778     unsigned NumDefs = 0;
10779
10780     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10781       SDValue X = N0.getOperand(i);
10782       if (!X.isUndef()) {
10783         V = X;
10784         Idx = i;
10785         NumDefs++;
10786       }
10787       // Stop if more than one members are non-undef.
10788       if (NumDefs > 1)
10789         break;
10790       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10791                                      VT.getVectorElementType(),
10792                                      X.getValueType().getVectorNumElements()));
10793     }
10794
10795     if (NumDefs == 0)
10796       return DAG.getUNDEF(VT);
10797
10798     if (NumDefs == 1) {
10799       assert(V.getNode() && "The single defined operand is empty!");
10800       SmallVector<SDValue, 8> Opnds;
10801       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10802         if (i != Idx) {
10803           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10804           continue;
10805         }
10806         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10807         AddToWorklist(NV.getNode());
10808         Opnds.push_back(NV);
10809       }
10810       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10811     }
10812   }
10813
10814   // Fold truncate of a bitcast of a vector to an extract of the low vector
10815   // element.
10816   //
10817   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10818   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10819     SDValue VecSrc = N0.getOperand(0);
10820     EVT SrcVT = VecSrc.getValueType();
10821     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10822         (!LegalOperations ||
10823          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10824       SDLoc SL(N);
10825
10826       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10827       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10828       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10829                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10830     }
10831   }
10832
10833   // Simplify the operands using demanded-bits information.
10834   if (!VT.isVector() &&
10835       SimplifyDemandedBits(SDValue(N, 0)))
10836     return SDValue(N, 0);
10837
10838   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10839   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10840   // When the adde's carry is not used.
10841   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10842       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10843       // We only do for addcarry before legalize operation
10844       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10845        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10846     SDLoc SL(N);
10847     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10848     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10849     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10850     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10851   }
10852
10853   // fold (truncate (extract_subvector(ext x))) ->
10854   //      (extract_subvector x)
10855   // TODO: This can be generalized to cover cases where the truncate and extract
10856   // do not fully cancel each other out.
10857   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10858     SDValue N00 = N0.getOperand(0);
10859     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10860         N00.getOpcode() == ISD::ZERO_EXTEND ||
10861         N00.getOpcode() == ISD::ANY_EXTEND) {
10862       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10863           VT.getVectorElementType())
10864         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10865                            N00.getOperand(0), N0.getOperand(1));
10866     }
10867   }
10868
10869   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10870     return NewVSel;
10871
10872   // Narrow a suitable binary operation with a non-opaque constant operand by
10873   // moving it ahead of the truncate. This is limited to pre-legalization
10874   // because targets may prefer a wider type during later combines and invert
10875   // this transform.
10876   switch (N0.getOpcode()) {
10877   case ISD::ADD:
10878   case ISD::SUB:
10879   case ISD::MUL:
10880   case ISD::AND:
10881   case ISD::OR:
10882   case ISD::XOR:
10883     if (!LegalOperations && N0.hasOneUse() &&
10884         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10885          isConstantOrConstantVector(N0.getOperand(1), true))) {
10886       // TODO: We already restricted this to pre-legalization, but for vectors
10887       // we are extra cautious to not create an unsupported operation.
10888       // Target-specific changes are likely needed to avoid regressions here.
10889       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10890         SDLoc DL(N);
10891         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10892         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10893         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10894       }
10895     }
10896   }
10897
10898   return SDValue();
10899 }
10900
10901 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10902   SDValue Elt = N->getOperand(i);
10903   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10904     return Elt.getNode();
10905   return Elt.getOperand(Elt.getResNo()).getNode();
10906 }
10907
10908 /// build_pair (load, load) -> load
10909 /// if load locations are consecutive.
10910 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10911   assert(N->getOpcode() == ISD::BUILD_PAIR);
10912
10913   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10914   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10915
10916   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10917   // most significant part in elt 1. So when combining into one large load, we
10918   // need to consider the endianness.
10919   if (DAG.getDataLayout().isBigEndian())
10920     std::swap(LD1, LD2);
10921
10922   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10923       LD1->getAddressSpace() != LD2->getAddressSpace())
10924     return SDValue();
10925   EVT LD1VT = LD1->getValueType(0);
10926   unsigned LD1Bytes = LD1VT.getStoreSize();
10927   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10928       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10929     unsigned Align = LD1->getAlignment();
10930     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10931         VT.getTypeForEVT(*DAG.getContext()));
10932
10933     if (NewAlign <= Align &&
10934         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10935       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10936                          LD1->getPointerInfo(), Align);
10937   }
10938
10939   return SDValue();
10940 }
10941
10942 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10943   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10944   // and Lo parts; on big-endian machines it doesn't.
10945   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10946 }
10947
10948 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10949                                     const TargetLowering &TLI) {
10950   // If this is not a bitcast to an FP type or if the target doesn't have
10951   // IEEE754-compliant FP logic, we're done.
10952   EVT VT = N->getValueType(0);
10953   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10954     return SDValue();
10955
10956   // TODO: Handle cases where the integer constant is a different scalar
10957   // bitwidth to the FP.
10958   SDValue N0 = N->getOperand(0);
10959   EVT SourceVT = N0.getValueType();
10960   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10961     return SDValue();
10962
10963   unsigned FPOpcode;
10964   APInt SignMask;
10965   switch (N0.getOpcode()) {
10966   case ISD::AND:
10967     FPOpcode = ISD::FABS;
10968     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10969     break;
10970   case ISD::XOR:
10971     FPOpcode = ISD::FNEG;
10972     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10973     break;
10974   case ISD::OR:
10975     FPOpcode = ISD::FABS;
10976     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10977     break;
10978   default:
10979     return SDValue();
10980   }
10981
10982   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10983   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10984   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10985   //   fneg (fabs X)
10986   SDValue LogicOp0 = N0.getOperand(0);
10987   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10988   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10989       LogicOp0.getOpcode() == ISD::BITCAST &&
10990       LogicOp0.getOperand(0).getValueType() == VT) {
10991     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10992     NumFPLogicOpsConv++;
10993     if (N0.getOpcode() == ISD::OR)
10994       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10995     return FPOp;
10996   }
10997
10998   return SDValue();
10999 }
11000
11001 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11002   SDValue N0 = N->getOperand(0);
11003   EVT VT = N->getValueType(0);
11004
11005   if (N0.isUndef())
11006     return DAG.getUNDEF(VT);
11007
11008   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11009   // Only do this before legalize types, unless both types are integer and the
11010   // scalar type is legal. Only do this before legalize ops, since the target
11011   // maybe depending on the bitcast.
11012   // First check to see if this is all constant.
11013   // TODO: Support FP bitcasts after legalize types.
11014   if (VT.isVector() &&
11015       (!LegalTypes ||
11016        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11017         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11018       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11019       cast<BuildVectorSDNode>(N0)->isConstant())
11020     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11021                                              VT.getVectorElementType());
11022
11023   // If the input is a constant, let getNode fold it.
11024   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11025     // If we can't allow illegal operations, we need to check that this is just
11026     // a fp -> int or int -> conversion and that the resulting operation will
11027     // be legal.
11028     if (!LegalOperations ||
11029         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11030          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11031         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11032          TLI.isOperationLegal(ISD::Constant, VT))) {
11033       SDValue C = DAG.getBitcast(VT, N0);
11034       if (C.getNode() != N)
11035         return C;
11036     }
11037   }
11038
11039   // (conv (conv x, t1), t2) -> (conv x, t2)
11040   if (N0.getOpcode() == ISD::BITCAST)
11041     return DAG.getBitcast(VT, N0.getOperand(0));
11042
11043   // fold (conv (load x)) -> (load (conv*)x)
11044   // If the resultant load doesn't need a higher alignment than the original!
11045   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11046       // Do not remove the cast if the types differ in endian layout.
11047       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11048           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11049       // If the load is volatile, we only want to change the load type if the
11050       // resulting load is legal. Otherwise we might increase the number of
11051       // memory accesses. We don't care if the original type was legal or not
11052       // as we assume software couldn't rely on the number of accesses of an
11053       // illegal type.
11054       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
11055        TLI.isOperationLegal(ISD::LOAD, VT))) {
11056     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11057
11058     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11059                                     *LN0->getMemOperand())) {
11060       SDValue Load =
11061           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11062                       LN0->getPointerInfo(), LN0->getAlignment(),
11063                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11064       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11065       return Load;
11066     }
11067   }
11068
11069   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11070     return V;
11071
11072   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11073   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11074   //
11075   // For ppc_fp128:
11076   // fold (bitcast (fneg x)) ->
11077   //     flipbit = signbit
11078   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11079   //
11080   // fold (bitcast (fabs x)) ->
11081   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11082   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11083   // This often reduces constant pool loads.
11084   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11085        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11086       N0.getNode()->hasOneUse() && VT.isInteger() &&
11087       !VT.isVector() && !N0.getValueType().isVector()) {
11088     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11089     AddToWorklist(NewConv.getNode());
11090
11091     SDLoc DL(N);
11092     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11093       assert(VT.getSizeInBits() == 128);
11094       SDValue SignBit = DAG.getConstant(
11095           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11096       SDValue FlipBit;
11097       if (N0.getOpcode() == ISD::FNEG) {
11098         FlipBit = SignBit;
11099         AddToWorklist(FlipBit.getNode());
11100       } else {
11101         assert(N0.getOpcode() == ISD::FABS);
11102         SDValue Hi =
11103             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11104                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11105                                               SDLoc(NewConv)));
11106         AddToWorklist(Hi.getNode());
11107         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11108         AddToWorklist(FlipBit.getNode());
11109       }
11110       SDValue FlipBits =
11111           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11112       AddToWorklist(FlipBits.getNode());
11113       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11114     }
11115     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11116     if (N0.getOpcode() == ISD::FNEG)
11117       return DAG.getNode(ISD::XOR, DL, VT,
11118                          NewConv, DAG.getConstant(SignBit, DL, VT));
11119     assert(N0.getOpcode() == ISD::FABS);
11120     return DAG.getNode(ISD::AND, DL, VT,
11121                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11122   }
11123
11124   // fold (bitconvert (fcopysign cst, x)) ->
11125   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11126   // Note that we don't handle (copysign x, cst) because this can always be
11127   // folded to an fneg or fabs.
11128   //
11129   // For ppc_fp128:
11130   // fold (bitcast (fcopysign cst, x)) ->
11131   //     flipbit = (and (extract_element
11132   //                     (xor (bitcast cst), (bitcast x)), 0),
11133   //                    signbit)
11134   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11135   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11136       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11137       VT.isInteger() && !VT.isVector()) {
11138     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11139     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11140     if (isTypeLegal(IntXVT)) {
11141       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11142       AddToWorklist(X.getNode());
11143
11144       // If X has a different width than the result/lhs, sext it or truncate it.
11145       unsigned VTWidth = VT.getSizeInBits();
11146       if (OrigXWidth < VTWidth) {
11147         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11148         AddToWorklist(X.getNode());
11149       } else if (OrigXWidth > VTWidth) {
11150         // To get the sign bit in the right place, we have to shift it right
11151         // before truncating.
11152         SDLoc DL(X);
11153         X = DAG.getNode(ISD::SRL, DL,
11154                         X.getValueType(), X,
11155                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11156                                         X.getValueType()));
11157         AddToWorklist(X.getNode());
11158         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11159         AddToWorklist(X.getNode());
11160       }
11161
11162       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11163         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11164         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11165         AddToWorklist(Cst.getNode());
11166         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11167         AddToWorklist(X.getNode());
11168         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11169         AddToWorklist(XorResult.getNode());
11170         SDValue XorResult64 = DAG.getNode(
11171             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11172             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11173                                   SDLoc(XorResult)));
11174         AddToWorklist(XorResult64.getNode());
11175         SDValue FlipBit =
11176             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11177                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11178         AddToWorklist(FlipBit.getNode());
11179         SDValue FlipBits =
11180             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11181         AddToWorklist(FlipBits.getNode());
11182         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11183       }
11184       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11185       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11186                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11187       AddToWorklist(X.getNode());
11188
11189       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11190       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11191                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11192       AddToWorklist(Cst.getNode());
11193
11194       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11195     }
11196   }
11197
11198   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11199   if (N0.getOpcode() == ISD::BUILD_PAIR)
11200     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11201       return CombineLD;
11202
11203   // Remove double bitcasts from shuffles - this is often a legacy of
11204   // XformToShuffleWithZero being used to combine bitmaskings (of
11205   // float vectors bitcast to integer vectors) into shuffles.
11206   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11207   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11208       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11209       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11210       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11211     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11212
11213     // If operands are a bitcast, peek through if it casts the original VT.
11214     // If operands are a constant, just bitcast back to original VT.
11215     auto PeekThroughBitcast = [&](SDValue Op) {
11216       if (Op.getOpcode() == ISD::BITCAST &&
11217           Op.getOperand(0).getValueType() == VT)
11218         return SDValue(Op.getOperand(0));
11219       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11220           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11221         return DAG.getBitcast(VT, Op);
11222       return SDValue();
11223     };
11224
11225     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11226     // the result type of this bitcast. This would eliminate at least one
11227     // bitcast. See the transform in InstCombine.
11228     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11229     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11230     if (!(SV0 && SV1))
11231       return SDValue();
11232
11233     int MaskScale =
11234         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11235     SmallVector<int, 8> NewMask;
11236     for (int M : SVN->getMask())
11237       for (int i = 0; i != MaskScale; ++i)
11238         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11239
11240     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
11241     if (!LegalMask) {
11242       std::swap(SV0, SV1);
11243       ShuffleVectorSDNode::commuteMask(NewMask);
11244       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
11245     }
11246
11247     if (LegalMask)
11248       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
11249   }
11250
11251   return SDValue();
11252 }
11253
11254 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11255   EVT VT = N->getValueType(0);
11256   return CombineConsecutiveLoads(N, VT);
11257 }
11258
11259 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11260 /// operands. DstEltVT indicates the destination element value type.
11261 SDValue DAGCombiner::
11262 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11263   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11264
11265   // If this is already the right type, we're done.
11266   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11267
11268   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11269   unsigned DstBitSize = DstEltVT.getSizeInBits();
11270
11271   // If this is a conversion of N elements of one type to N elements of another
11272   // type, convert each element.  This handles FP<->INT cases.
11273   if (SrcBitSize == DstBitSize) {
11274     SmallVector<SDValue, 8> Ops;
11275     for (SDValue Op : BV->op_values()) {
11276       // If the vector element type is not legal, the BUILD_VECTOR operands
11277       // are promoted and implicitly truncated.  Make that explicit here.
11278       if (Op.getValueType() != SrcEltVT)
11279         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11280       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11281       AddToWorklist(Ops.back().getNode());
11282     }
11283     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11284                               BV->getValueType(0).getVectorNumElements());
11285     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11286   }
11287
11288   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11289   // handle annoying details of growing/shrinking FP values, we convert them to
11290   // int first.
11291   if (SrcEltVT.isFloatingPoint()) {
11292     // Convert the input float vector to a int vector where the elements are the
11293     // same sizes.
11294     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11295     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11296     SrcEltVT = IntVT;
11297   }
11298
11299   // Now we know the input is an integer vector.  If the output is a FP type,
11300   // convert to integer first, then to FP of the right size.
11301   if (DstEltVT.isFloatingPoint()) {
11302     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11303     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11304
11305     // Next, convert to FP elements of the same size.
11306     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11307   }
11308
11309   SDLoc DL(BV);
11310
11311   // Okay, we know the src/dst types are both integers of differing types.
11312   // Handling growing first.
11313   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11314   if (SrcBitSize < DstBitSize) {
11315     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11316
11317     SmallVector<SDValue, 8> Ops;
11318     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11319          i += NumInputsPerOutput) {
11320       bool isLE = DAG.getDataLayout().isLittleEndian();
11321       APInt NewBits = APInt(DstBitSize, 0);
11322       bool EltIsUndef = true;
11323       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11324         // Shift the previously computed bits over.
11325         NewBits <<= SrcBitSize;
11326         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11327         if (Op.isUndef()) continue;
11328         EltIsUndef = false;
11329
11330         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11331                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11332       }
11333
11334       if (EltIsUndef)
11335         Ops.push_back(DAG.getUNDEF(DstEltVT));
11336       else
11337         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11338     }
11339
11340     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11341     return DAG.getBuildVector(VT, DL, Ops);
11342   }
11343
11344   // Finally, this must be the case where we are shrinking elements: each input
11345   // turns into multiple outputs.
11346   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11347   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11348                             NumOutputsPerInput*BV->getNumOperands());
11349   SmallVector<SDValue, 8> Ops;
11350
11351   for (const SDValue &Op : BV->op_values()) {
11352     if (Op.isUndef()) {
11353       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11354       continue;
11355     }
11356
11357     APInt OpVal = cast<ConstantSDNode>(Op)->
11358                   getAPIntValue().zextOrTrunc(SrcBitSize);
11359
11360     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11361       APInt ThisVal = OpVal.trunc(DstBitSize);
11362       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11363       OpVal.lshrInPlace(DstBitSize);
11364     }
11365
11366     // For big endian targets, swap the order of the pieces of each element.
11367     if (DAG.getDataLayout().isBigEndian())
11368       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11369   }
11370
11371   return DAG.getBuildVector(VT, DL, Ops);
11372 }
11373
11374 static bool isContractable(SDNode *N) {
11375   SDNodeFlags F = N->getFlags();
11376   return F.hasAllowContract() || F.hasAllowReassociation();
11377 }
11378
11379 /// Try to perform FMA combining on a given FADD node.
11380 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11381   SDValue N0 = N->getOperand(0);
11382   SDValue N1 = N->getOperand(1);
11383   EVT VT = N->getValueType(0);
11384   SDLoc SL(N);
11385
11386   const TargetOptions &Options = DAG.getTarget().Options;
11387
11388   // Floating-point multiply-add with intermediate rounding.
11389   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11390
11391   // Floating-point multiply-add without intermediate rounding.
11392   bool HasFMA =
11393       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11394       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11395
11396   // No valid opcode, do not combine.
11397   if (!HasFMAD && !HasFMA)
11398     return SDValue();
11399
11400   SDNodeFlags Flags = N->getFlags();
11401   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11402   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11403                               CanFuse || HasFMAD);
11404   // If the addition is not contractable, do not combine.
11405   if (!AllowFusionGlobally && !isContractable(N))
11406     return SDValue();
11407
11408   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11409   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11410     return SDValue();
11411
11412   // Always prefer FMAD to FMA for precision.
11413   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11414   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11415
11416   // Is the node an FMUL and contractable either due to global flags or
11417   // SDNodeFlags.
11418   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11419     if (N.getOpcode() != ISD::FMUL)
11420       return false;
11421     return AllowFusionGlobally || isContractable(N.getNode());
11422   };
11423   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11424   // prefer to fold the multiply with fewer uses.
11425   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11426     if (N0.getNode()->use_size() > N1.getNode()->use_size())
11427       std::swap(N0, N1);
11428   }
11429
11430   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11431   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11432     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11433                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
11434   }
11435
11436   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11437   // Note: Commutes FADD operands.
11438   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11439     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11440                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
11441   }
11442
11443   // Look through FP_EXTEND nodes to do more combining.
11444
11445   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11446   if (N0.getOpcode() == ISD::FP_EXTEND) {
11447     SDValue N00 = N0.getOperand(0);
11448     if (isContractableFMUL(N00) &&
11449         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11450       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11451                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11452                                      N00.getOperand(0)),
11453                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11454                                      N00.getOperand(1)), N1, Flags);
11455     }
11456   }
11457
11458   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11459   // Note: Commutes FADD operands.
11460   if (N1.getOpcode() == ISD::FP_EXTEND) {
11461     SDValue N10 = N1.getOperand(0);
11462     if (isContractableFMUL(N10) &&
11463         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11464       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11465                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11466                                      N10.getOperand(0)),
11467                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11468                                      N10.getOperand(1)), N0, Flags);
11469     }
11470   }
11471
11472   // More folding opportunities when target permits.
11473   if (Aggressive) {
11474     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11475     if (CanFuse &&
11476         N0.getOpcode() == PreferredFusedOpcode &&
11477         N0.getOperand(2).getOpcode() == ISD::FMUL &&
11478         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11479       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11480                          N0.getOperand(0), N0.getOperand(1),
11481                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11482                                      N0.getOperand(2).getOperand(0),
11483                                      N0.getOperand(2).getOperand(1),
11484                                      N1, Flags), Flags);
11485     }
11486
11487     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11488     if (CanFuse &&
11489         N1->getOpcode() == PreferredFusedOpcode &&
11490         N1.getOperand(2).getOpcode() == ISD::FMUL &&
11491         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11492       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11493                          N1.getOperand(0), N1.getOperand(1),
11494                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11495                                      N1.getOperand(2).getOperand(0),
11496                                      N1.getOperand(2).getOperand(1),
11497                                      N0, Flags), Flags);
11498     }
11499
11500
11501     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11502     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11503     auto FoldFAddFMAFPExtFMul = [&] (
11504       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11505       SDNodeFlags Flags) {
11506       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11507                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11508                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11509                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11510                                      Z, Flags), Flags);
11511     };
11512     if (N0.getOpcode() == PreferredFusedOpcode) {
11513       SDValue N02 = N0.getOperand(2);
11514       if (N02.getOpcode() == ISD::FP_EXTEND) {
11515         SDValue N020 = N02.getOperand(0);
11516         if (isContractableFMUL(N020) &&
11517             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11518           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11519                                       N020.getOperand(0), N020.getOperand(1),
11520                                       N1, Flags);
11521         }
11522       }
11523     }
11524
11525     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11526     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11527     // FIXME: This turns two single-precision and one double-precision
11528     // operation into two double-precision operations, which might not be
11529     // interesting for all targets, especially GPUs.
11530     auto FoldFAddFPExtFMAFMul = [&] (
11531       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11532       SDNodeFlags Flags) {
11533       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11534                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11535                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11536                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11537                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11538                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11539                                      Z, Flags), Flags);
11540     };
11541     if (N0.getOpcode() == ISD::FP_EXTEND) {
11542       SDValue N00 = N0.getOperand(0);
11543       if (N00.getOpcode() == PreferredFusedOpcode) {
11544         SDValue N002 = N00.getOperand(2);
11545         if (isContractableFMUL(N002) &&
11546             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11547           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11548                                       N002.getOperand(0), N002.getOperand(1),
11549                                       N1, Flags);
11550         }
11551       }
11552     }
11553
11554     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11555     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11556     if (N1.getOpcode() == PreferredFusedOpcode) {
11557       SDValue N12 = N1.getOperand(2);
11558       if (N12.getOpcode() == ISD::FP_EXTEND) {
11559         SDValue N120 = N12.getOperand(0);
11560         if (isContractableFMUL(N120) &&
11561             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11562           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11563                                       N120.getOperand(0), N120.getOperand(1),
11564                                       N0, Flags);
11565         }
11566       }
11567     }
11568
11569     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11570     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11571     // FIXME: This turns two single-precision and one double-precision
11572     // operation into two double-precision operations, which might not be
11573     // interesting for all targets, especially GPUs.
11574     if (N1.getOpcode() == ISD::FP_EXTEND) {
11575       SDValue N10 = N1.getOperand(0);
11576       if (N10.getOpcode() == PreferredFusedOpcode) {
11577         SDValue N102 = N10.getOperand(2);
11578         if (isContractableFMUL(N102) &&
11579             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11580           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11581                                       N102.getOperand(0), N102.getOperand(1),
11582                                       N0, Flags);
11583         }
11584       }
11585     }
11586   }
11587
11588   return SDValue();
11589 }
11590
11591 /// Try to perform FMA combining on a given FSUB node.
11592 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11593   SDValue N0 = N->getOperand(0);
11594   SDValue N1 = N->getOperand(1);
11595   EVT VT = N->getValueType(0);
11596   SDLoc SL(N);
11597
11598   const TargetOptions &Options = DAG.getTarget().Options;
11599   // Floating-point multiply-add with intermediate rounding.
11600   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11601
11602   // Floating-point multiply-add without intermediate rounding.
11603   bool HasFMA =
11604       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11605       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11606
11607   // No valid opcode, do not combine.
11608   if (!HasFMAD && !HasFMA)
11609     return SDValue();
11610
11611   const SDNodeFlags Flags = N->getFlags();
11612   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11613   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11614                               CanFuse || HasFMAD);
11615
11616   // If the subtraction is not contractable, do not combine.
11617   if (!AllowFusionGlobally && !isContractable(N))
11618     return SDValue();
11619
11620   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11621   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11622     return SDValue();
11623
11624   // Always prefer FMAD to FMA for precision.
11625   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11626   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11627
11628   // Is the node an FMUL and contractable either due to global flags or
11629   // SDNodeFlags.
11630   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11631     if (N.getOpcode() != ISD::FMUL)
11632       return false;
11633     return AllowFusionGlobally || isContractable(N.getNode());
11634   };
11635
11636   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11637   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11638     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11639                        N0.getOperand(0), N0.getOperand(1),
11640                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11641   }
11642
11643   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11644   // Note: Commutes FSUB operands.
11645   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11646     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11647                        DAG.getNode(ISD::FNEG, SL, VT,
11648                                    N1.getOperand(0)),
11649                        N1.getOperand(1), N0, Flags);
11650   }
11651
11652   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11653   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11654       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11655     SDValue N00 = N0.getOperand(0).getOperand(0);
11656     SDValue N01 = N0.getOperand(0).getOperand(1);
11657     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11658                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11659                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11660   }
11661
11662   // Look through FP_EXTEND nodes to do more combining.
11663
11664   // fold (fsub (fpext (fmul x, y)), z)
11665   //   -> (fma (fpext x), (fpext y), (fneg z))
11666   if (N0.getOpcode() == ISD::FP_EXTEND) {
11667     SDValue N00 = N0.getOperand(0);
11668     if (isContractableFMUL(N00) &&
11669         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11670       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11671                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11672                                      N00.getOperand(0)),
11673                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11674                                      N00.getOperand(1)),
11675                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11676     }
11677   }
11678
11679   // fold (fsub x, (fpext (fmul y, z)))
11680   //   -> (fma (fneg (fpext y)), (fpext z), x)
11681   // Note: Commutes FSUB operands.
11682   if (N1.getOpcode() == ISD::FP_EXTEND) {
11683     SDValue N10 = N1.getOperand(0);
11684     if (isContractableFMUL(N10) &&
11685         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11686       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11687                          DAG.getNode(ISD::FNEG, SL, VT,
11688                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11689                                                  N10.getOperand(0))),
11690                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11691                                      N10.getOperand(1)),
11692                          N0, Flags);
11693     }
11694   }
11695
11696   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11697   //   -> (fneg (fma (fpext x), (fpext y), z))
11698   // Note: This could be removed with appropriate canonicalization of the
11699   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11700   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11701   // from implementing the canonicalization in visitFSUB.
11702   if (N0.getOpcode() == ISD::FP_EXTEND) {
11703     SDValue N00 = N0.getOperand(0);
11704     if (N00.getOpcode() == ISD::FNEG) {
11705       SDValue N000 = N00.getOperand(0);
11706       if (isContractableFMUL(N000) &&
11707           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11708         return DAG.getNode(ISD::FNEG, SL, VT,
11709                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11710                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11711                                                    N000.getOperand(0)),
11712                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11713                                                    N000.getOperand(1)),
11714                                        N1, Flags));
11715       }
11716     }
11717   }
11718
11719   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11720   //   -> (fneg (fma (fpext x)), (fpext y), z)
11721   // Note: This could be removed with appropriate canonicalization of the
11722   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11723   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11724   // from implementing the canonicalization in visitFSUB.
11725   if (N0.getOpcode() == ISD::FNEG) {
11726     SDValue N00 = N0.getOperand(0);
11727     if (N00.getOpcode() == ISD::FP_EXTEND) {
11728       SDValue N000 = N00.getOperand(0);
11729       if (isContractableFMUL(N000) &&
11730           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11731         return DAG.getNode(ISD::FNEG, SL, VT,
11732                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11733                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11734                                                    N000.getOperand(0)),
11735                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11736                                                    N000.getOperand(1)),
11737                                        N1, Flags));
11738       }
11739     }
11740   }
11741
11742   // More folding opportunities when target permits.
11743   if (Aggressive) {
11744     // fold (fsub (fma x, y, (fmul u, v)), z)
11745     //   -> (fma x, y (fma u, v, (fneg z)))
11746     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11747         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11748         N0.getOperand(2)->hasOneUse()) {
11749       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11750                          N0.getOperand(0), N0.getOperand(1),
11751                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11752                                      N0.getOperand(2).getOperand(0),
11753                                      N0.getOperand(2).getOperand(1),
11754                                      DAG.getNode(ISD::FNEG, SL, VT,
11755                                                  N1), Flags), Flags);
11756     }
11757
11758     // fold (fsub x, (fma y, z, (fmul u, v)))
11759     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11760     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11761         isContractableFMUL(N1.getOperand(2))) {
11762       SDValue N20 = N1.getOperand(2).getOperand(0);
11763       SDValue N21 = N1.getOperand(2).getOperand(1);
11764       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11765                          DAG.getNode(ISD::FNEG, SL, VT,
11766                                      N1.getOperand(0)),
11767                          N1.getOperand(1),
11768                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11769                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11770                                      N21, N0, Flags), Flags);
11771     }
11772
11773
11774     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11775     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11776     if (N0.getOpcode() == PreferredFusedOpcode) {
11777       SDValue N02 = N0.getOperand(2);
11778       if (N02.getOpcode() == ISD::FP_EXTEND) {
11779         SDValue N020 = N02.getOperand(0);
11780         if (isContractableFMUL(N020) &&
11781             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11782           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11783                              N0.getOperand(0), N0.getOperand(1),
11784                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11785                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11786                                                      N020.getOperand(0)),
11787                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11788                                                      N020.getOperand(1)),
11789                                          DAG.getNode(ISD::FNEG, SL, VT,
11790                                                      N1), Flags), Flags);
11791         }
11792       }
11793     }
11794
11795     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11796     //   -> (fma (fpext x), (fpext y),
11797     //           (fma (fpext u), (fpext v), (fneg z)))
11798     // FIXME: This turns two single-precision and one double-precision
11799     // operation into two double-precision operations, which might not be
11800     // interesting for all targets, especially GPUs.
11801     if (N0.getOpcode() == ISD::FP_EXTEND) {
11802       SDValue N00 = N0.getOperand(0);
11803       if (N00.getOpcode() == PreferredFusedOpcode) {
11804         SDValue N002 = N00.getOperand(2);
11805         if (isContractableFMUL(N002) &&
11806             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11807           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11808                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11809                                          N00.getOperand(0)),
11810                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11811                                          N00.getOperand(1)),
11812                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11813                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11814                                                      N002.getOperand(0)),
11815                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11816                                                      N002.getOperand(1)),
11817                                          DAG.getNode(ISD::FNEG, SL, VT,
11818                                                      N1), Flags), Flags);
11819         }
11820       }
11821     }
11822
11823     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11824     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11825     if (N1.getOpcode() == PreferredFusedOpcode &&
11826         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11827       SDValue N120 = N1.getOperand(2).getOperand(0);
11828       if (isContractableFMUL(N120) &&
11829           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11830         SDValue N1200 = N120.getOperand(0);
11831         SDValue N1201 = N120.getOperand(1);
11832         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11833                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11834                            N1.getOperand(1),
11835                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11836                                        DAG.getNode(ISD::FNEG, SL, VT,
11837                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11838                                                                VT, N1200)),
11839                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11840                                                    N1201),
11841                                        N0, Flags), Flags);
11842       }
11843     }
11844
11845     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11846     //   -> (fma (fneg (fpext y)), (fpext z),
11847     //           (fma (fneg (fpext u)), (fpext v), x))
11848     // FIXME: This turns two single-precision and one double-precision
11849     // operation into two double-precision operations, which might not be
11850     // interesting for all targets, especially GPUs.
11851     if (N1.getOpcode() == ISD::FP_EXTEND &&
11852         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11853       SDValue CvtSrc = N1.getOperand(0);
11854       SDValue N100 = CvtSrc.getOperand(0);
11855       SDValue N101 = CvtSrc.getOperand(1);
11856       SDValue N102 = CvtSrc.getOperand(2);
11857       if (isContractableFMUL(N102) &&
11858           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11859         SDValue N1020 = N102.getOperand(0);
11860         SDValue N1021 = N102.getOperand(1);
11861         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11862                            DAG.getNode(ISD::FNEG, SL, VT,
11863                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11864                                                    N100)),
11865                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11866                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11867                                        DAG.getNode(ISD::FNEG, SL, VT,
11868                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11869                                                                VT, N1020)),
11870                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11871                                                    N1021),
11872                                        N0, Flags), Flags);
11873       }
11874     }
11875   }
11876
11877   return SDValue();
11878 }
11879
11880 /// Try to perform FMA combining on a given FMUL node based on the distributive
11881 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11882 /// subtraction instead of addition).
11883 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11884   SDValue N0 = N->getOperand(0);
11885   SDValue N1 = N->getOperand(1);
11886   EVT VT = N->getValueType(0);
11887   SDLoc SL(N);
11888   const SDNodeFlags Flags = N->getFlags();
11889
11890   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11891
11892   const TargetOptions &Options = DAG.getTarget().Options;
11893
11894   // The transforms below are incorrect when x == 0 and y == inf, because the
11895   // intermediate multiplication produces a nan.
11896   if (!Options.NoInfsFPMath)
11897     return SDValue();
11898
11899   // Floating-point multiply-add without intermediate rounding.
11900   bool HasFMA =
11901       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11902       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11903       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11904
11905   // Floating-point multiply-add with intermediate rounding. This can result
11906   // in a less precise result due to the changed rounding order.
11907   bool HasFMAD = Options.UnsafeFPMath &&
11908                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11909
11910   // No valid opcode, do not combine.
11911   if (!HasFMAD && !HasFMA)
11912     return SDValue();
11913
11914   // Always prefer FMAD to FMA for precision.
11915   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11916   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11917
11918   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11919   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11920   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11921     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11922       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11923         if (C->isExactlyValue(+1.0))
11924           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11925                              Y, Flags);
11926         if (C->isExactlyValue(-1.0))
11927           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11928                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11929       }
11930     }
11931     return SDValue();
11932   };
11933
11934   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11935     return FMA;
11936   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11937     return FMA;
11938
11939   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11940   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11941   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11942   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11943   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11944     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11945       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11946         if (C0->isExactlyValue(+1.0))
11947           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11948                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11949                              Y, Flags);
11950         if (C0->isExactlyValue(-1.0))
11951           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11952                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11953                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11954       }
11955       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11956         if (C1->isExactlyValue(+1.0))
11957           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11958                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11959         if (C1->isExactlyValue(-1.0))
11960           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11961                              Y, Flags);
11962       }
11963     }
11964     return SDValue();
11965   };
11966
11967   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11968     return FMA;
11969   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11970     return FMA;
11971
11972   return SDValue();
11973 }
11974
11975 SDValue DAGCombiner::visitFADD(SDNode *N) {
11976   SDValue N0 = N->getOperand(0);
11977   SDValue N1 = N->getOperand(1);
11978   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11979   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11980   EVT VT = N->getValueType(0);
11981   SDLoc DL(N);
11982   const TargetOptions &Options = DAG.getTarget().Options;
11983   const SDNodeFlags Flags = N->getFlags();
11984
11985   // fold vector ops
11986   if (VT.isVector())
11987     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11988       return FoldedVOp;
11989
11990   // fold (fadd c1, c2) -> c1 + c2
11991   if (N0CFP && N1CFP)
11992     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11993
11994   // canonicalize constant to RHS
11995   if (N0CFP && !N1CFP)
11996     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11997
11998   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11999   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12000   if (N1C && N1C->isZero())
12001     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
12002       return N0;
12003
12004   if (SDValue NewSel = foldBinOpIntoSelect(N))
12005     return NewSel;
12006
12007   // fold (fadd A, (fneg B)) -> (fsub A, B)
12008   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12009       isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
12010     return DAG.getNode(ISD::FSUB, DL, VT, N0,
12011                        GetNegatedExpression(N1, DAG, LegalOperations,
12012                                             ForCodeSize), Flags);
12013
12014   // fold (fadd (fneg A), B) -> (fsub B, A)
12015   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12016       isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
12017     return DAG.getNode(ISD::FSUB, DL, VT, N1,
12018                        GetNegatedExpression(N0, DAG, LegalOperations,
12019                                             ForCodeSize), Flags);
12020
12021   auto isFMulNegTwo = [](SDValue FMul) {
12022     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12023       return false;
12024     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12025     return C && C->isExactlyValue(-2.0);
12026   };
12027
12028   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12029   if (isFMulNegTwo(N0)) {
12030     SDValue B = N0.getOperand(0);
12031     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12032     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12033   }
12034   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12035   if (isFMulNegTwo(N1)) {
12036     SDValue B = N1.getOperand(0);
12037     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12038     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12039   }
12040
12041   // No FP constant should be created after legalization as Instruction
12042   // Selection pass has a hard time dealing with FP constants.
12043   bool AllowNewConst = (Level < AfterLegalizeDAG);
12044
12045   // If nnan is enabled, fold lots of things.
12046   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12047     // If allowed, fold (fadd (fneg x), x) -> 0.0
12048     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12049       return DAG.getConstantFP(0.0, DL, VT);
12050
12051     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12052     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12053       return DAG.getConstantFP(0.0, DL, VT);
12054   }
12055
12056   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12057   // TODO: break out portions of the transformations below for which Unsafe is
12058   //       considered and which do not require both nsz and reassoc
12059   if ((Options.UnsafeFPMath ||
12060        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12061       AllowNewConst) {
12062     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12063     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12064         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12065       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12066       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12067     }
12068
12069     // We can fold chains of FADD's of the same value into multiplications.
12070     // This transform is not safe in general because we are reducing the number
12071     // of rounding steps.
12072     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12073       if (N0.getOpcode() == ISD::FMUL) {
12074         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12075         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12076
12077         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12078         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12079           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12080                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12081           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12082         }
12083
12084         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12085         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12086             N1.getOperand(0) == N1.getOperand(1) &&
12087             N0.getOperand(0) == N1.getOperand(0)) {
12088           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12089                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12090           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12091         }
12092       }
12093
12094       if (N1.getOpcode() == ISD::FMUL) {
12095         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12096         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12097
12098         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12099         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12100           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12101                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12102           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12103         }
12104
12105         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12106         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12107             N0.getOperand(0) == N0.getOperand(1) &&
12108             N1.getOperand(0) == N0.getOperand(0)) {
12109           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12110                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12111           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12112         }
12113       }
12114
12115       if (N0.getOpcode() == ISD::FADD) {
12116         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12117         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12118         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12119             (N0.getOperand(0) == N1)) {
12120           return DAG.getNode(ISD::FMUL, DL, VT,
12121                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12122         }
12123       }
12124
12125       if (N1.getOpcode() == ISD::FADD) {
12126         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12127         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12128         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12129             N1.getOperand(0) == N0) {
12130           return DAG.getNode(ISD::FMUL, DL, VT,
12131                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12132         }
12133       }
12134
12135       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12136       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12137           N0.getOperand(0) == N0.getOperand(1) &&
12138           N1.getOperand(0) == N1.getOperand(1) &&
12139           N0.getOperand(0) == N1.getOperand(0)) {
12140         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12141                            DAG.getConstantFP(4.0, DL, VT), Flags);
12142       }
12143     }
12144   } // enable-unsafe-fp-math
12145
12146   // FADD -> FMA combines:
12147   if (SDValue Fused = visitFADDForFMACombine(N)) {
12148     AddToWorklist(Fused.getNode());
12149     return Fused;
12150   }
12151   return SDValue();
12152 }
12153
12154 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12155   SDValue N0 = N->getOperand(0);
12156   SDValue N1 = N->getOperand(1);
12157   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12158   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12159   EVT VT = N->getValueType(0);
12160   SDLoc DL(N);
12161   const TargetOptions &Options = DAG.getTarget().Options;
12162   const SDNodeFlags Flags = N->getFlags();
12163
12164   // fold vector ops
12165   if (VT.isVector())
12166     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12167       return FoldedVOp;
12168
12169   // fold (fsub c1, c2) -> c1-c2
12170   if (N0CFP && N1CFP)
12171     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12172
12173   if (SDValue NewSel = foldBinOpIntoSelect(N))
12174     return NewSel;
12175
12176   // (fsub A, 0) -> A
12177   if (N1CFP && N1CFP->isZero()) {
12178     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
12179         Flags.hasNoSignedZeros()) {
12180       return N0;
12181     }
12182   }
12183
12184   if (N0 == N1) {
12185     // (fsub x, x) -> 0.0
12186     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12187       return DAG.getConstantFP(0.0f, DL, VT);
12188   }
12189
12190   // (fsub -0.0, N1) -> -N1
12191   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12192   //       FSUB does not specify the sign bit of a NaN. Also note that for
12193   //       the same reason, the inverse transform is not safe, unless fast math
12194   //       flags are in play.
12195   if (N0CFP && N0CFP->isZero()) {
12196     if (N0CFP->isNegative() ||
12197         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12198       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12199         return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12200       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12201         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12202     }
12203   }
12204
12205   if ((Options.UnsafeFPMath ||
12206       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
12207       && N1.getOpcode() == ISD::FADD) {
12208     // X - (X + Y) -> -Y
12209     if (N0 == N1->getOperand(0))
12210       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12211     // X - (Y + X) -> -Y
12212     if (N0 == N1->getOperand(1))
12213       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12214   }
12215
12216   // fold (fsub A, (fneg B)) -> (fadd A, B)
12217   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12218     return DAG.getNode(ISD::FADD, DL, VT, N0,
12219                        GetNegatedExpression(N1, DAG, LegalOperations,
12220                                             ForCodeSize), Flags);
12221
12222   // FSUB -> FMA combines:
12223   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12224     AddToWorklist(Fused.getNode());
12225     return Fused;
12226   }
12227
12228   return SDValue();
12229 }
12230
12231 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12232   SDValue N0 = N->getOperand(0);
12233   SDValue N1 = N->getOperand(1);
12234   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12235   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12236   EVT VT = N->getValueType(0);
12237   SDLoc DL(N);
12238   const TargetOptions &Options = DAG.getTarget().Options;
12239   const SDNodeFlags Flags = N->getFlags();
12240
12241   // fold vector ops
12242   if (VT.isVector()) {
12243     // This just handles C1 * C2 for vectors. Other vector folds are below.
12244     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12245       return FoldedVOp;
12246   }
12247
12248   // fold (fmul c1, c2) -> c1*c2
12249   if (N0CFP && N1CFP)
12250     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12251
12252   // canonicalize constant to RHS
12253   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12254      !isConstantFPBuildVectorOrConstantFP(N1))
12255     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12256
12257   // fold (fmul A, 1.0) -> A
12258   if (N1CFP && N1CFP->isExactlyValue(1.0))
12259     return N0;
12260
12261   if (SDValue NewSel = foldBinOpIntoSelect(N))
12262     return NewSel;
12263
12264   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12265       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12266     // fold (fmul A, 0) -> 0
12267     if (N1CFP && N1CFP->isZero())
12268       return N1;
12269   }
12270
12271   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12272     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12273     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12274         N0.getOpcode() == ISD::FMUL) {
12275       SDValue N00 = N0.getOperand(0);
12276       SDValue N01 = N0.getOperand(1);
12277       // Avoid an infinite loop by making sure that N00 is not a constant
12278       // (the inner multiply has not been constant folded yet).
12279       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12280           !isConstantFPBuildVectorOrConstantFP(N00)) {
12281         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12282         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12283       }
12284     }
12285
12286     // Match a special-case: we convert X * 2.0 into fadd.
12287     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12288     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12289         N0.getOperand(0) == N0.getOperand(1)) {
12290       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12291       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12292       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12293     }
12294   }
12295
12296   // fold (fmul X, 2.0) -> (fadd X, X)
12297   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12298     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12299
12300   // fold (fmul X, -1.0) -> (fneg X)
12301   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12302     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12303       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12304
12305   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
12306   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12307                                        ForCodeSize)) {
12308     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12309                                          ForCodeSize)) {
12310       // Both can be negated for free, check to see if at least one is cheaper
12311       // negated.
12312       if (LHSNeg == 2 || RHSNeg == 2)
12313         return DAG.getNode(ISD::FMUL, DL, VT,
12314                            GetNegatedExpression(N0, DAG, LegalOperations,
12315                                                 ForCodeSize),
12316                            GetNegatedExpression(N1, DAG, LegalOperations,
12317                                                 ForCodeSize),
12318                            Flags);
12319     }
12320   }
12321
12322   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12323   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12324   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12325       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12326       TLI.isOperationLegal(ISD::FABS, VT)) {
12327     SDValue Select = N0, X = N1;
12328     if (Select.getOpcode() != ISD::SELECT)
12329       std::swap(Select, X);
12330
12331     SDValue Cond = Select.getOperand(0);
12332     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12333     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12334
12335     if (TrueOpnd && FalseOpnd &&
12336         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12337         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12338         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12339       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12340       switch (CC) {
12341       default: break;
12342       case ISD::SETOLT:
12343       case ISD::SETULT:
12344       case ISD::SETOLE:
12345       case ISD::SETULE:
12346       case ISD::SETLT:
12347       case ISD::SETLE:
12348         std::swap(TrueOpnd, FalseOpnd);
12349         LLVM_FALLTHROUGH;
12350       case ISD::SETOGT:
12351       case ISD::SETUGT:
12352       case ISD::SETOGE:
12353       case ISD::SETUGE:
12354       case ISD::SETGT:
12355       case ISD::SETGE:
12356         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12357             TLI.isOperationLegal(ISD::FNEG, VT))
12358           return DAG.getNode(ISD::FNEG, DL, VT,
12359                    DAG.getNode(ISD::FABS, DL, VT, X));
12360         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12361           return DAG.getNode(ISD::FABS, DL, VT, X);
12362
12363         break;
12364       }
12365     }
12366   }
12367
12368   // FMUL -> FMA combines:
12369   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12370     AddToWorklist(Fused.getNode());
12371     return Fused;
12372   }
12373
12374   return SDValue();
12375 }
12376
12377 SDValue DAGCombiner::visitFMA(SDNode *N) {
12378   SDValue N0 = N->getOperand(0);
12379   SDValue N1 = N->getOperand(1);
12380   SDValue N2 = N->getOperand(2);
12381   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12382   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12383   EVT VT = N->getValueType(0);
12384   SDLoc DL(N);
12385   const TargetOptions &Options = DAG.getTarget().Options;
12386
12387   // FMA nodes have flags that propagate to the created nodes.
12388   const SDNodeFlags Flags = N->getFlags();
12389   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12390
12391   // Constant fold FMA.
12392   if (isa<ConstantFPSDNode>(N0) &&
12393       isa<ConstantFPSDNode>(N1) &&
12394       isa<ConstantFPSDNode>(N2)) {
12395     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12396   }
12397
12398   if (UnsafeFPMath) {
12399     if (N0CFP && N0CFP->isZero())
12400       return N2;
12401     if (N1CFP && N1CFP->isZero())
12402       return N2;
12403   }
12404   // TODO: The FMA node should have flags that propagate to these nodes.
12405   if (N0CFP && N0CFP->isExactlyValue(1.0))
12406     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12407   if (N1CFP && N1CFP->isExactlyValue(1.0))
12408     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12409
12410   // Canonicalize (fma c, x, y) -> (fma x, c, y)
12411   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12412      !isConstantFPBuildVectorOrConstantFP(N1))
12413     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12414
12415   if (UnsafeFPMath) {
12416     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12417     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12418         isConstantFPBuildVectorOrConstantFP(N1) &&
12419         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
12420       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12421                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12422                                      Flags), Flags);
12423     }
12424
12425     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12426     if (N0.getOpcode() == ISD::FMUL &&
12427         isConstantFPBuildVectorOrConstantFP(N1) &&
12428         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12429       return DAG.getNode(ISD::FMA, DL, VT,
12430                          N0.getOperand(0),
12431                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12432                                      Flags),
12433                          N2);
12434     }
12435   }
12436
12437   // (fma x, 1, y) -> (fadd x, y)
12438   // (fma x, -1, y) -> (fadd (fneg x), y)
12439   if (N1CFP) {
12440     if (N1CFP->isExactlyValue(1.0))
12441       // TODO: The FMA node should have flags that propagate to this node.
12442       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12443
12444     if (N1CFP->isExactlyValue(-1.0) &&
12445         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12446       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12447       AddToWorklist(RHSNeg.getNode());
12448       // TODO: The FMA node should have flags that propagate to this node.
12449       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12450     }
12451
12452     // fma (fneg x), K, y -> fma x -K, y
12453     if (N0.getOpcode() == ISD::FNEG &&
12454         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12455          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12456                                               ForCodeSize)))) {
12457       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12458                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12459     }
12460   }
12461
12462   if (UnsafeFPMath) {
12463     // (fma x, c, x) -> (fmul x, (c+1))
12464     if (N1CFP && N0 == N2) {
12465       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12466                          DAG.getNode(ISD::FADD, DL, VT, N1,
12467                                      DAG.getConstantFP(1.0, DL, VT), Flags),
12468                          Flags);
12469     }
12470
12471     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12472     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12473       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12474                          DAG.getNode(ISD::FADD, DL, VT, N1,
12475                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
12476                          Flags);
12477     }
12478   }
12479
12480   return SDValue();
12481 }
12482
12483 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12484 // reciprocal.
12485 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12486 // Notice that this is not always beneficial. One reason is different targets
12487 // may have different costs for FDIV and FMUL, so sometimes the cost of two
12488 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12489 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12490 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12491   // TODO: Limit this transform based on optsize/minsize - it always creates at
12492   //       least 1 extra instruction. But the perf win may be substantial enough
12493   //       that only minsize should restrict this.
12494   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12495   const SDNodeFlags Flags = N->getFlags();
12496   if (!UnsafeMath && !Flags.hasAllowReciprocal())
12497     return SDValue();
12498
12499   // Skip if current node is a reciprocal/fneg-reciprocal.
12500   SDValue N0 = N->getOperand(0);
12501   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12502   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12503     return SDValue();
12504
12505   // Exit early if the target does not want this transform or if there can't
12506   // possibly be enough uses of the divisor to make the transform worthwhile.
12507   SDValue N1 = N->getOperand(1);
12508   unsigned MinUses = TLI.combineRepeatedFPDivisors();
12509
12510   // For splat vectors, scale the number of uses by the splat factor. If we can
12511   // convert the division into a scalar op, that will likely be much faster.
12512   unsigned NumElts = 1;
12513   EVT VT = N->getValueType(0);
12514   if (VT.isVector() && DAG.isSplatValue(N1))
12515     NumElts = VT.getVectorNumElements();
12516
12517   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12518     return SDValue();
12519
12520   // Find all FDIV users of the same divisor.
12521   // Use a set because duplicates may be present in the user list.
12522   SetVector<SDNode *> Users;
12523   for (auto *U : N1->uses()) {
12524     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12525       // This division is eligible for optimization only if global unsafe math
12526       // is enabled or if this division allows reciprocal formation.
12527       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12528         Users.insert(U);
12529     }
12530   }
12531
12532   // Now that we have the actual number of divisor uses, make sure it meets
12533   // the minimum threshold specified by the target.
12534   if ((Users.size() * NumElts) < MinUses)
12535     return SDValue();
12536
12537   SDLoc DL(N);
12538   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12539   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12540
12541   // Dividend / Divisor -> Dividend * Reciprocal
12542   for (auto *U : Users) {
12543     SDValue Dividend = U->getOperand(0);
12544     if (Dividend != FPOne) {
12545       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12546                                     Reciprocal, Flags);
12547       CombineTo(U, NewNode);
12548     } else if (U != Reciprocal.getNode()) {
12549       // In the absence of fast-math-flags, this user node is always the
12550       // same node as Reciprocal, but with FMF they may be different nodes.
12551       CombineTo(U, Reciprocal);
12552     }
12553   }
12554   return SDValue(N, 0);  // N was replaced.
12555 }
12556
12557 SDValue DAGCombiner::visitFDIV(SDNode *N) {
12558   SDValue N0 = N->getOperand(0);
12559   SDValue N1 = N->getOperand(1);
12560   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12561   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12562   EVT VT = N->getValueType(0);
12563   SDLoc DL(N);
12564   const TargetOptions &Options = DAG.getTarget().Options;
12565   SDNodeFlags Flags = N->getFlags();
12566
12567   // fold vector ops
12568   if (VT.isVector())
12569     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12570       return FoldedVOp;
12571
12572   // fold (fdiv c1, c2) -> c1/c2
12573   if (N0CFP && N1CFP)
12574     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12575
12576   if (SDValue NewSel = foldBinOpIntoSelect(N))
12577     return NewSel;
12578
12579   if (SDValue V = combineRepeatedFPDivisors(N))
12580     return V;
12581
12582   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12583     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12584     if (N1CFP) {
12585       // Compute the reciprocal 1.0 / c2.
12586       const APFloat &N1APF = N1CFP->getValueAPF();
12587       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12588       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12589       // Only do the transform if the reciprocal is a legal fp immediate that
12590       // isn't too nasty (eg NaN, denormal, ...).
12591       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12592           (!LegalOperations ||
12593            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12594            // backend)... we should handle this gracefully after Legalize.
12595            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12596            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12597            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12598         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12599                            DAG.getConstantFP(Recip, DL, VT), Flags);
12600     }
12601
12602     // If this FDIV is part of a reciprocal square root, it may be folded
12603     // into a target-specific square root estimate instruction.
12604     if (N1.getOpcode() == ISD::FSQRT) {
12605       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
12606         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12607       }
12608     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12609                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12610       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12611                                           Flags)) {
12612         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12613         AddToWorklist(RV.getNode());
12614         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12615       }
12616     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12617                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12618       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12619                                           Flags)) {
12620         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12621         AddToWorklist(RV.getNode());
12622         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12623       }
12624     } else if (N1.getOpcode() == ISD::FMUL) {
12625       // Look through an FMUL. Even though this won't remove the FDIV directly,
12626       // it's still worthwhile to get rid of the FSQRT if possible.
12627       SDValue SqrtOp;
12628       SDValue OtherOp;
12629       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12630         SqrtOp = N1.getOperand(0);
12631         OtherOp = N1.getOperand(1);
12632       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12633         SqrtOp = N1.getOperand(1);
12634         OtherOp = N1.getOperand(0);
12635       }
12636       if (SqrtOp.getNode()) {
12637         // We found a FSQRT, so try to make this fold:
12638         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12639         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12640           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12641           AddToWorklist(RV.getNode());
12642           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12643         }
12644       }
12645     }
12646
12647     // Fold into a reciprocal estimate and multiply instead of a real divide.
12648     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12649       AddToWorklist(RV.getNode());
12650       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12651     }
12652   }
12653
12654   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12655   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12656                                        ForCodeSize)) {
12657     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12658                                          ForCodeSize)) {
12659       // Both can be negated for free, check to see if at least one is cheaper
12660       // negated.
12661       if (LHSNeg == 2 || RHSNeg == 2)
12662         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12663                            GetNegatedExpression(N0, DAG, LegalOperations,
12664                                                 ForCodeSize),
12665                            GetNegatedExpression(N1, DAG, LegalOperations,
12666                                                 ForCodeSize),
12667                            Flags);
12668     }
12669   }
12670
12671   return SDValue();
12672 }
12673
12674 SDValue DAGCombiner::visitFREM(SDNode *N) {
12675   SDValue N0 = N->getOperand(0);
12676   SDValue N1 = N->getOperand(1);
12677   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12678   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12679   EVT VT = N->getValueType(0);
12680
12681   // fold (frem c1, c2) -> fmod(c1,c2)
12682   if (N0CFP && N1CFP)
12683     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12684
12685   if (SDValue NewSel = foldBinOpIntoSelect(N))
12686     return NewSel;
12687
12688   return SDValue();
12689 }
12690
12691 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12692   SDNodeFlags Flags = N->getFlags();
12693   if (!DAG.getTarget().Options.UnsafeFPMath &&
12694       !Flags.hasApproximateFuncs())
12695     return SDValue();
12696
12697   SDValue N0 = N->getOperand(0);
12698   if (TLI.isFsqrtCheap(N0, DAG))
12699     return SDValue();
12700
12701   // FSQRT nodes have flags that propagate to the created nodes.
12702   return buildSqrtEstimate(N0, Flags);
12703 }
12704
12705 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12706 /// copysign(x, fp_round(y)) -> copysign(x, y)
12707 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12708   SDValue N1 = N->getOperand(1);
12709   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12710        N1.getOpcode() == ISD::FP_ROUND)) {
12711     // Do not optimize out type conversion of f128 type yet.
12712     // For some targets like x86_64, configuration is changed to keep one f128
12713     // value in one SSE register, but instruction selection cannot handle
12714     // FCOPYSIGN on SSE registers yet.
12715     EVT N1VT = N1->getValueType(0);
12716     EVT N1Op0VT = N1->getOperand(0).getValueType();
12717     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12718   }
12719   return false;
12720 }
12721
12722 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12723   SDValue N0 = N->getOperand(0);
12724   SDValue N1 = N->getOperand(1);
12725   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12726   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12727   EVT VT = N->getValueType(0);
12728
12729   if (N0CFP && N1CFP) // Constant fold
12730     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12731
12732   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12733     const APFloat &V = N1C->getValueAPF();
12734     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12735     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12736     if (!V.isNegative()) {
12737       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12738         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12739     } else {
12740       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12741         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12742                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12743     }
12744   }
12745
12746   // copysign(fabs(x), y) -> copysign(x, y)
12747   // copysign(fneg(x), y) -> copysign(x, y)
12748   // copysign(copysign(x,z), y) -> copysign(x, y)
12749   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12750       N0.getOpcode() == ISD::FCOPYSIGN)
12751     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12752
12753   // copysign(x, abs(y)) -> abs(x)
12754   if (N1.getOpcode() == ISD::FABS)
12755     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12756
12757   // copysign(x, copysign(y,z)) -> copysign(x, z)
12758   if (N1.getOpcode() == ISD::FCOPYSIGN)
12759     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12760
12761   // copysign(x, fp_extend(y)) -> copysign(x, y)
12762   // copysign(x, fp_round(y)) -> copysign(x, y)
12763   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12764     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12765
12766   return SDValue();
12767 }
12768
12769 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12770   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12771   if (!ExponentC)
12772     return SDValue();
12773
12774   // Try to convert x ** (1/3) into cube root.
12775   // TODO: Handle the various flavors of long double.
12776   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12777   //       Some range near 1/3 should be fine.
12778   EVT VT = N->getValueType(0);
12779   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12780       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12781     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12782     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12783     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12784     // For regular numbers, rounding may cause the results to differ.
12785     // Therefore, we require { nsz ninf nnan afn } for this transform.
12786     // TODO: We could select out the special cases if we don't have nsz/ninf.
12787     SDNodeFlags Flags = N->getFlags();
12788     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12789         !Flags.hasApproximateFuncs())
12790       return SDValue();
12791
12792     // Do not create a cbrt() libcall if the target does not have it, and do not
12793     // turn a pow that has lowering support into a cbrt() libcall.
12794     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12795         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12796          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12797       return SDValue();
12798
12799     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12800   }
12801
12802   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12803   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12804   // TODO: This could be extended (using a target hook) to handle smaller
12805   // power-of-2 fractional exponents.
12806   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12807   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12808   if (ExponentIs025 || ExponentIs075) {
12809     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12810     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12811     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12812     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12813     // For regular numbers, rounding may cause the results to differ.
12814     // Therefore, we require { nsz ninf afn } for this transform.
12815     // TODO: We could select out the special cases if we don't have nsz/ninf.
12816     SDNodeFlags Flags = N->getFlags();
12817
12818     // We only need no signed zeros for the 0.25 case.
12819     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12820         !Flags.hasApproximateFuncs())
12821       return SDValue();
12822
12823     // Don't double the number of libcalls. We are trying to inline fast code.
12824     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12825       return SDValue();
12826
12827     // Assume that libcalls are the smallest code.
12828     // TODO: This restriction should probably be lifted for vectors.
12829     if (DAG.getMachineFunction().getFunction().hasOptSize())
12830       return SDValue();
12831
12832     // pow(X, 0.25) --> sqrt(sqrt(X))
12833     SDLoc DL(N);
12834     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12835     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12836     if (ExponentIs025)
12837       return SqrtSqrt;
12838     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12839     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12840   }
12841
12842   return SDValue();
12843 }
12844
12845 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12846                                const TargetLowering &TLI) {
12847   // This optimization is guarded by a function attribute because it may produce
12848   // unexpected results. Ie, programs may be relying on the platform-specific
12849   // undefined behavior when the float-to-int conversion overflows.
12850   const Function &F = DAG.getMachineFunction().getFunction();
12851   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12852   if (StrictOverflow.getValueAsString().equals("false"))
12853     return SDValue();
12854
12855   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12856   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12857   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12858   // conversions would return +0.0.
12859   // FIXME: We should be able to use node-level FMF here.
12860   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12861   EVT VT = N->getValueType(0);
12862   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12863       !DAG.getTarget().Options.NoSignedZerosFPMath)
12864     return SDValue();
12865
12866   // fptosi/fptoui round towards zero, so converting from FP to integer and
12867   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12868   SDValue N0 = N->getOperand(0);
12869   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12870       N0.getOperand(0).getValueType() == VT)
12871     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12872
12873   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12874       N0.getOperand(0).getValueType() == VT)
12875     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12876
12877   return SDValue();
12878 }
12879
12880 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12881   SDValue N0 = N->getOperand(0);
12882   EVT VT = N->getValueType(0);
12883   EVT OpVT = N0.getValueType();
12884
12885   // [us]itofp(undef) = 0, because the result value is bounded.
12886   if (N0.isUndef())
12887     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12888
12889   // fold (sint_to_fp c1) -> c1fp
12890   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12891       // ...but only if the target supports immediate floating-point values
12892       (!LegalOperations ||
12893        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12894     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12895
12896   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12897   // but UINT_TO_FP is legal on this target, try to convert.
12898   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12899       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12900     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12901     if (DAG.SignBitIsZero(N0))
12902       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12903   }
12904
12905   // The next optimizations are desirable only if SELECT_CC can be lowered.
12906   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12907     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12908     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12909         !VT.isVector() &&
12910         (!LegalOperations ||
12911          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12912       SDLoc DL(N);
12913       SDValue Ops[] =
12914         { N0.getOperand(0), N0.getOperand(1),
12915           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12916           N0.getOperand(2) };
12917       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12918     }
12919
12920     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12921     //      (select_cc x, y, 1.0, 0.0,, cc)
12922     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12923         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12924         (!LegalOperations ||
12925          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12926       SDLoc DL(N);
12927       SDValue Ops[] =
12928         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12929           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12930           N0.getOperand(0).getOperand(2) };
12931       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12932     }
12933   }
12934
12935   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12936     return FTrunc;
12937
12938   return SDValue();
12939 }
12940
12941 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12942   SDValue N0 = N->getOperand(0);
12943   EVT VT = N->getValueType(0);
12944   EVT OpVT = N0.getValueType();
12945
12946   // [us]itofp(undef) = 0, because the result value is bounded.
12947   if (N0.isUndef())
12948     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12949
12950   // fold (uint_to_fp c1) -> c1fp
12951   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12952       // ...but only if the target supports immediate floating-point values
12953       (!LegalOperations ||
12954        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12955     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12956
12957   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12958   // but SINT_TO_FP is legal on this target, try to convert.
12959   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12960       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12961     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12962     if (DAG.SignBitIsZero(N0))
12963       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12964   }
12965
12966   // The next optimizations are desirable only if SELECT_CC can be lowered.
12967   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12968     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12969     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12970         (!LegalOperations ||
12971          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12972       SDLoc DL(N);
12973       SDValue Ops[] =
12974         { N0.getOperand(0), N0.getOperand(1),
12975           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12976           N0.getOperand(2) };
12977       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12978     }
12979   }
12980
12981   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12982     return FTrunc;
12983
12984   return SDValue();
12985 }
12986
12987 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12988 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12989   SDValue N0 = N->getOperand(0);
12990   EVT VT = N->getValueType(0);
12991
12992   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12993     return SDValue();
12994
12995   SDValue Src = N0.getOperand(0);
12996   EVT SrcVT = Src.getValueType();
12997   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12998   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12999
13000   // We can safely assume the conversion won't overflow the output range,
13001   // because (for example) (uint8_t)18293.f is undefined behavior.
13002
13003   // Since we can assume the conversion won't overflow, our decision as to
13004   // whether the input will fit in the float should depend on the minimum
13005   // of the input range and output range.
13006
13007   // This means this is also safe for a signed input and unsigned output, since
13008   // a negative input would lead to undefined behavior.
13009   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13010   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13011   unsigned ActualSize = std::min(InputSize, OutputSize);
13012   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13013
13014   // We can only fold away the float conversion if the input range can be
13015   // represented exactly in the float range.
13016   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13017     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13018       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13019                                                        : ISD::ZERO_EXTEND;
13020       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13021     }
13022     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13023       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13024     return DAG.getBitcast(VT, Src);
13025   }
13026   return SDValue();
13027 }
13028
13029 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13030   SDValue N0 = N->getOperand(0);
13031   EVT VT = N->getValueType(0);
13032
13033   // fold (fp_to_sint undef) -> undef
13034   if (N0.isUndef())
13035     return DAG.getUNDEF(VT);
13036
13037   // fold (fp_to_sint c1fp) -> c1
13038   if (isConstantFPBuildVectorOrConstantFP(N0))
13039     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13040
13041   return FoldIntToFPToInt(N, DAG);
13042 }
13043
13044 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13045   SDValue N0 = N->getOperand(0);
13046   EVT VT = N->getValueType(0);
13047
13048   // fold (fp_to_uint undef) -> undef
13049   if (N0.isUndef())
13050     return DAG.getUNDEF(VT);
13051
13052   // fold (fp_to_uint c1fp) -> c1
13053   if (isConstantFPBuildVectorOrConstantFP(N0))
13054     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13055
13056   return FoldIntToFPToInt(N, DAG);
13057 }
13058
13059 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13060   SDValue N0 = N->getOperand(0);
13061   SDValue N1 = N->getOperand(1);
13062   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13063   EVT VT = N->getValueType(0);
13064
13065   // fold (fp_round c1fp) -> c1fp
13066   if (N0CFP)
13067     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13068
13069   // fold (fp_round (fp_extend x)) -> x
13070   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13071     return N0.getOperand(0);
13072
13073   // fold (fp_round (fp_round x)) -> (fp_round x)
13074   if (N0.getOpcode() == ISD::FP_ROUND) {
13075     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13076     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13077
13078     // Skip this folding if it results in an fp_round from f80 to f16.
13079     //
13080     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13081     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13082     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13083     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13084     // x86.
13085     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13086       return SDValue();
13087
13088     // If the first fp_round isn't a value preserving truncation, it might
13089     // introduce a tie in the second fp_round, that wouldn't occur in the
13090     // single-step fp_round we want to fold to.
13091     // In other words, double rounding isn't the same as rounding.
13092     // Also, this is a value preserving truncation iff both fp_round's are.
13093     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13094       SDLoc DL(N);
13095       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13096                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13097     }
13098   }
13099
13100   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13101   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13102     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13103                               N0.getOperand(0), N1);
13104     AddToWorklist(Tmp.getNode());
13105     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13106                        Tmp, N0.getOperand(1));
13107   }
13108
13109   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13110     return NewVSel;
13111
13112   return SDValue();
13113 }
13114
13115 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
13116   SDValue N0 = N->getOperand(0);
13117   EVT VT = N->getValueType(0);
13118   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13119   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13120
13121   // fold (fp_round_inreg c1fp) -> c1fp
13122   if (N0CFP && isTypeLegal(EVT)) {
13123     SDLoc DL(N);
13124     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
13125     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
13126   }
13127
13128   return SDValue();
13129 }
13130
13131 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13132   SDValue N0 = N->getOperand(0);
13133   EVT VT = N->getValueType(0);
13134
13135   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13136   if (N->hasOneUse() &&
13137       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13138     return SDValue();
13139
13140   // fold (fp_extend c1fp) -> c1fp
13141   if (isConstantFPBuildVectorOrConstantFP(N0))
13142     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13143
13144   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13145   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13146       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13147     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13148
13149   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13150   // value of X.
13151   if (N0.getOpcode() == ISD::FP_ROUND
13152       && N0.getConstantOperandVal(1) == 1) {
13153     SDValue In = N0.getOperand(0);
13154     if (In.getValueType() == VT) return In;
13155     if (VT.bitsLT(In.getValueType()))
13156       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13157                          In, N0.getOperand(1));
13158     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13159   }
13160
13161   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13162   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13163        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13164     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13165     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13166                                      LN0->getChain(),
13167                                      LN0->getBasePtr(), N0.getValueType(),
13168                                      LN0->getMemOperand());
13169     CombineTo(N, ExtLoad);
13170     CombineTo(N0.getNode(),
13171               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13172                           N0.getValueType(), ExtLoad,
13173                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13174               ExtLoad.getValue(1));
13175     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13176   }
13177
13178   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13179     return NewVSel;
13180
13181   return SDValue();
13182 }
13183
13184 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13185   SDValue N0 = N->getOperand(0);
13186   EVT VT = N->getValueType(0);
13187
13188   // fold (fceil c1) -> fceil(c1)
13189   if (isConstantFPBuildVectorOrConstantFP(N0))
13190     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13191
13192   return SDValue();
13193 }
13194
13195 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13196   SDValue N0 = N->getOperand(0);
13197   EVT VT = N->getValueType(0);
13198
13199   // fold (ftrunc c1) -> ftrunc(c1)
13200   if (isConstantFPBuildVectorOrConstantFP(N0))
13201     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13202
13203   // fold ftrunc (known rounded int x) -> x
13204   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13205   // likely to be generated to extract integer from a rounded floating value.
13206   switch (N0.getOpcode()) {
13207   default: break;
13208   case ISD::FRINT:
13209   case ISD::FTRUNC:
13210   case ISD::FNEARBYINT:
13211   case ISD::FFLOOR:
13212   case ISD::FCEIL:
13213     return N0;
13214   }
13215
13216   return SDValue();
13217 }
13218
13219 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13220   SDValue N0 = N->getOperand(0);
13221   EVT VT = N->getValueType(0);
13222
13223   // fold (ffloor c1) -> ffloor(c1)
13224   if (isConstantFPBuildVectorOrConstantFP(N0))
13225     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13226
13227   return SDValue();
13228 }
13229
13230 // FIXME: FNEG and FABS have a lot in common; refactor.
13231 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13232   SDValue N0 = N->getOperand(0);
13233   EVT VT = N->getValueType(0);
13234
13235   // Constant fold FNEG.
13236   if (isConstantFPBuildVectorOrConstantFP(N0))
13237     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13238
13239   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
13240                          &DAG.getTarget().Options, ForCodeSize))
13241     return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13242
13243   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13244   // constant pool values.
13245   if (!TLI.isFNegFree(VT) &&
13246       N0.getOpcode() == ISD::BITCAST &&
13247       N0.getNode()->hasOneUse()) {
13248     SDValue Int = N0.getOperand(0);
13249     EVT IntVT = Int.getValueType();
13250     if (IntVT.isInteger() && !IntVT.isVector()) {
13251       APInt SignMask;
13252       if (N0.getValueType().isVector()) {
13253         // For a vector, get a mask such as 0x80... per scalar element
13254         // and splat it.
13255         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13256         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13257       } else {
13258         // For a scalar, just generate 0x80...
13259         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13260       }
13261       SDLoc DL0(N0);
13262       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13263                         DAG.getConstant(SignMask, DL0, IntVT));
13264       AddToWorklist(Int.getNode());
13265       return DAG.getBitcast(VT, Int);
13266     }
13267   }
13268
13269   // (fneg (fmul c, x)) -> (fmul -c, x)
13270   if (N0.getOpcode() == ISD::FMUL &&
13271       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13272     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13273     if (CFP1) {
13274       APFloat CVal = CFP1->getValueAPF();
13275       CVal.changeSign();
13276       if (Level >= AfterLegalizeDAG &&
13277           (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13278            TLI.isOperationLegal(ISD::ConstantFP, VT)))
13279         return DAG.getNode(
13280             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13281             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13282             N0->getFlags());
13283     }
13284   }
13285
13286   return SDValue();
13287 }
13288
13289 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13290                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13291   SDValue N0 = N->getOperand(0);
13292   SDValue N1 = N->getOperand(1);
13293   EVT VT = N->getValueType(0);
13294   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13295   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13296
13297   if (N0CFP && N1CFP) {
13298     const APFloat &C0 = N0CFP->getValueAPF();
13299     const APFloat &C1 = N1CFP->getValueAPF();
13300     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13301   }
13302
13303   // Canonicalize to constant on RHS.
13304   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13305       !isConstantFPBuildVectorOrConstantFP(N1))
13306     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13307
13308   return SDValue();
13309 }
13310
13311 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13312   return visitFMinMax(DAG, N, minnum);
13313 }
13314
13315 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13316   return visitFMinMax(DAG, N, maxnum);
13317 }
13318
13319 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13320   return visitFMinMax(DAG, N, minimum);
13321 }
13322
13323 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13324   return visitFMinMax(DAG, N, maximum);
13325 }
13326
13327 SDValue DAGCombiner::visitFABS(SDNode *N) {
13328   SDValue N0 = N->getOperand(0);
13329   EVT VT = N->getValueType(0);
13330
13331   // fold (fabs c1) -> fabs(c1)
13332   if (isConstantFPBuildVectorOrConstantFP(N0))
13333     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13334
13335   // fold (fabs (fabs x)) -> (fabs x)
13336   if (N0.getOpcode() == ISD::FABS)
13337     return N->getOperand(0);
13338
13339   // fold (fabs (fneg x)) -> (fabs x)
13340   // fold (fabs (fcopysign x, y)) -> (fabs x)
13341   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13342     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13343
13344   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13345   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13346     SDValue Int = N0.getOperand(0);
13347     EVT IntVT = Int.getValueType();
13348     if (IntVT.isInteger() && !IntVT.isVector()) {
13349       APInt SignMask;
13350       if (N0.getValueType().isVector()) {
13351         // For a vector, get a mask such as 0x7f... per scalar element
13352         // and splat it.
13353         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13354         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13355       } else {
13356         // For a scalar, just generate 0x7f...
13357         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13358       }
13359       SDLoc DL(N0);
13360       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13361                         DAG.getConstant(SignMask, DL, IntVT));
13362       AddToWorklist(Int.getNode());
13363       return DAG.getBitcast(N->getValueType(0), Int);
13364     }
13365   }
13366
13367   return SDValue();
13368 }
13369
13370 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13371   SDValue Chain = N->getOperand(0);
13372   SDValue N1 = N->getOperand(1);
13373   SDValue N2 = N->getOperand(2);
13374
13375   // If N is a constant we could fold this into a fallthrough or unconditional
13376   // branch. However that doesn't happen very often in normal code, because
13377   // Instcombine/SimplifyCFG should have handled the available opportunities.
13378   // If we did this folding here, it would be necessary to update the
13379   // MachineBasicBlock CFG, which is awkward.
13380
13381   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13382   // on the target.
13383   if (N1.getOpcode() == ISD::SETCC &&
13384       TLI.isOperationLegalOrCustom(ISD::BR_CC,
13385                                    N1.getOperand(0).getValueType())) {
13386     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13387                        Chain, N1.getOperand(2),
13388                        N1.getOperand(0), N1.getOperand(1), N2);
13389   }
13390
13391   if (N1.hasOneUse()) {
13392     if (SDValue NewN1 = rebuildSetCC(N1))
13393       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13394   }
13395
13396   return SDValue();
13397 }
13398
13399 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13400   if (N.getOpcode() == ISD::SRL ||
13401       (N.getOpcode() == ISD::TRUNCATE &&
13402        (N.getOperand(0).hasOneUse() &&
13403         N.getOperand(0).getOpcode() == ISD::SRL))) {
13404     // Look pass the truncate.
13405     if (N.getOpcode() == ISD::TRUNCATE)
13406       N = N.getOperand(0);
13407
13408     // Match this pattern so that we can generate simpler code:
13409     //
13410     //   %a = ...
13411     //   %b = and i32 %a, 2
13412     //   %c = srl i32 %b, 1
13413     //   brcond i32 %c ...
13414     //
13415     // into
13416     //
13417     //   %a = ...
13418     //   %b = and i32 %a, 2
13419     //   %c = setcc eq %b, 0
13420     //   brcond %c ...
13421     //
13422     // This applies only when the AND constant value has one bit set and the
13423     // SRL constant is equal to the log2 of the AND constant. The back-end is
13424     // smart enough to convert the result into a TEST/JMP sequence.
13425     SDValue Op0 = N.getOperand(0);
13426     SDValue Op1 = N.getOperand(1);
13427
13428     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13429       SDValue AndOp1 = Op0.getOperand(1);
13430
13431       if (AndOp1.getOpcode() == ISD::Constant) {
13432         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13433
13434         if (AndConst.isPowerOf2() &&
13435             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13436           SDLoc DL(N);
13437           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13438                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13439                               ISD::SETNE);
13440         }
13441       }
13442     }
13443   }
13444
13445   // Transform br(xor(x, y)) -> br(x != y)
13446   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13447   if (N.getOpcode() == ISD::XOR) {
13448     // Because we may call this on a speculatively constructed
13449     // SimplifiedSetCC Node, we need to simplify this node first.
13450     // Ideally this should be folded into SimplifySetCC and not
13451     // here. For now, grab a handle to N so we don't lose it from
13452     // replacements interal to the visit.
13453     HandleSDNode XORHandle(N);
13454     while (N.getOpcode() == ISD::XOR) {
13455       SDValue Tmp = visitXOR(N.getNode());
13456       // No simplification done.
13457       if (!Tmp.getNode())
13458         break;
13459       // Returning N is form in-visit replacement that may invalidated
13460       // N. Grab value from Handle.
13461       if (Tmp.getNode() == N.getNode())
13462         N = XORHandle.getValue();
13463       else // Node simplified. Try simplifying again.
13464         N = Tmp;
13465     }
13466
13467     if (N.getOpcode() != ISD::XOR)
13468       return N;
13469
13470     SDNode *TheXor = N.getNode();
13471
13472     SDValue Op0 = TheXor->getOperand(0);
13473     SDValue Op1 = TheXor->getOperand(1);
13474
13475     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13476       bool Equal = false;
13477       if (isOneConstant(Op0) && Op0.hasOneUse() &&
13478           Op0.getOpcode() == ISD::XOR) {
13479         TheXor = Op0.getNode();
13480         Equal = true;
13481       }
13482
13483       EVT SetCCVT = N.getValueType();
13484       if (LegalTypes)
13485         SetCCVT = getSetCCResultType(SetCCVT);
13486       // Replace the uses of XOR with SETCC
13487       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13488                           Equal ? ISD::SETEQ : ISD::SETNE);
13489     }
13490   }
13491
13492   return SDValue();
13493 }
13494
13495 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13496 //
13497 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13498   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13499   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13500
13501   // If N is a constant we could fold this into a fallthrough or unconditional
13502   // branch. However that doesn't happen very often in normal code, because
13503   // Instcombine/SimplifyCFG should have handled the available opportunities.
13504   // If we did this folding here, it would be necessary to update the
13505   // MachineBasicBlock CFG, which is awkward.
13506
13507   // Use SimplifySetCC to simplify SETCC's.
13508   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13509                                CondLHS, CondRHS, CC->get(), SDLoc(N),
13510                                false);
13511   if (Simp.getNode()) AddToWorklist(Simp.getNode());
13512
13513   // fold to a simpler setcc
13514   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13515     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13516                        N->getOperand(0), Simp.getOperand(2),
13517                        Simp.getOperand(0), Simp.getOperand(1),
13518                        N->getOperand(4));
13519
13520   return SDValue();
13521 }
13522
13523 /// Return true if 'Use' is a load or a store that uses N as its base pointer
13524 /// and that N may be folded in the load / store addressing mode.
13525 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13526                                     SelectionDAG &DAG,
13527                                     const TargetLowering &TLI) {
13528   EVT VT;
13529   unsigned AS;
13530
13531   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
13532     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13533       return false;
13534     VT = LD->getMemoryVT();
13535     AS = LD->getAddressSpace();
13536   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
13537     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13538       return false;
13539     VT = ST->getMemoryVT();
13540     AS = ST->getAddressSpace();
13541   } else
13542     return false;
13543
13544   TargetLowering::AddrMode AM;
13545   if (N->getOpcode() == ISD::ADD) {
13546     AM.HasBaseReg = true;
13547     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13548     if (Offset)
13549       // [reg +/- imm]
13550       AM.BaseOffs = Offset->getSExtValue();
13551     else
13552       // [reg +/- reg]
13553       AM.Scale = 1;
13554   } else if (N->getOpcode() == ISD::SUB) {
13555     AM.HasBaseReg = true;
13556     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13557     if (Offset)
13558       // [reg +/- imm]
13559       AM.BaseOffs = -Offset->getSExtValue();
13560     else
13561       // [reg +/- reg]
13562       AM.Scale = 1;
13563   } else
13564     return false;
13565
13566   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13567                                    VT.getTypeForEVT(*DAG.getContext()), AS);
13568 }
13569
13570 /// Try turning a load/store into a pre-indexed load/store when the base
13571 /// pointer is an add or subtract and it has other uses besides the load/store.
13572 /// After the transformation, the new indexed load/store has effectively folded
13573 /// the add/subtract in and all of its other uses are redirected to the
13574 /// new load/store.
13575 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13576   if (Level < AfterLegalizeDAG)
13577     return false;
13578
13579   bool isLoad = true;
13580   SDValue Ptr;
13581   EVT VT;
13582   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13583     if (LD->isIndexed())
13584       return false;
13585     VT = LD->getMemoryVT();
13586     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13587         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13588       return false;
13589     Ptr = LD->getBasePtr();
13590   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13591     if (ST->isIndexed())
13592       return false;
13593     VT = ST->getMemoryVT();
13594     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13595         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13596       return false;
13597     Ptr = ST->getBasePtr();
13598     isLoad = false;
13599   } else {
13600     return false;
13601   }
13602
13603   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13604   // out.  There is no reason to make this a preinc/predec.
13605   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13606       Ptr.getNode()->hasOneUse())
13607     return false;
13608
13609   // Ask the target to do addressing mode selection.
13610   SDValue BasePtr;
13611   SDValue Offset;
13612   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13613   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13614     return false;
13615
13616   // Backends without true r+i pre-indexed forms may need to pass a
13617   // constant base with a variable offset so that constant coercion
13618   // will work with the patterns in canonical form.
13619   bool Swapped = false;
13620   if (isa<ConstantSDNode>(BasePtr)) {
13621     std::swap(BasePtr, Offset);
13622     Swapped = true;
13623   }
13624
13625   // Don't create a indexed load / store with zero offset.
13626   if (isNullConstant(Offset))
13627     return false;
13628
13629   // Try turning it into a pre-indexed load / store except when:
13630   // 1) The new base ptr is a frame index.
13631   // 2) If N is a store and the new base ptr is either the same as or is a
13632   //    predecessor of the value being stored.
13633   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13634   //    that would create a cycle.
13635   // 4) All uses are load / store ops that use it as old base ptr.
13636
13637   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13638   // (plus the implicit offset) to a register to preinc anyway.
13639   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13640     return false;
13641
13642   // Check #2.
13643   if (!isLoad) {
13644     SDValue Val = cast<StoreSDNode>(N)->getValue();
13645
13646     // Would require a copy.
13647     if (Val == BasePtr)
13648       return false;
13649
13650     // Would create a cycle.
13651     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13652       return false;
13653   }
13654
13655   // Caches for hasPredecessorHelper.
13656   SmallPtrSet<const SDNode *, 32> Visited;
13657   SmallVector<const SDNode *, 16> Worklist;
13658   Worklist.push_back(N);
13659
13660   // If the offset is a constant, there may be other adds of constants that
13661   // can be folded with this one. We should do this to avoid having to keep
13662   // a copy of the original base pointer.
13663   SmallVector<SDNode *, 16> OtherUses;
13664   if (isa<ConstantSDNode>(Offset))
13665     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13666                               UE = BasePtr.getNode()->use_end();
13667          UI != UE; ++UI) {
13668       SDUse &Use = UI.getUse();
13669       // Skip the use that is Ptr and uses of other results from BasePtr's
13670       // node (important for nodes that return multiple results).
13671       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13672         continue;
13673
13674       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13675         continue;
13676
13677       if (Use.getUser()->getOpcode() != ISD::ADD &&
13678           Use.getUser()->getOpcode() != ISD::SUB) {
13679         OtherUses.clear();
13680         break;
13681       }
13682
13683       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13684       if (!isa<ConstantSDNode>(Op1)) {
13685         OtherUses.clear();
13686         break;
13687       }
13688
13689       // FIXME: In some cases, we can be smarter about this.
13690       if (Op1.getValueType() != Offset.getValueType()) {
13691         OtherUses.clear();
13692         break;
13693       }
13694
13695       OtherUses.push_back(Use.getUser());
13696     }
13697
13698   if (Swapped)
13699     std::swap(BasePtr, Offset);
13700
13701   // Now check for #3 and #4.
13702   bool RealUse = false;
13703
13704   for (SDNode *Use : Ptr.getNode()->uses()) {
13705     if (Use == N)
13706       continue;
13707     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13708       return false;
13709
13710     // If Ptr may be folded in addressing mode of other use, then it's
13711     // not profitable to do this transformation.
13712     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13713       RealUse = true;
13714   }
13715
13716   if (!RealUse)
13717     return false;
13718
13719   SDValue Result;
13720   if (isLoad)
13721     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13722                                 BasePtr, Offset, AM);
13723   else
13724     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13725                                  BasePtr, Offset, AM);
13726   ++PreIndexedNodes;
13727   ++NodesCombined;
13728   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13729              Result.getNode()->dump(&DAG); dbgs() << '\n');
13730   WorklistRemover DeadNodes(*this);
13731   if (isLoad) {
13732     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13733     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13734   } else {
13735     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13736   }
13737
13738   // Finally, since the node is now dead, remove it from the graph.
13739   deleteAndRecombine(N);
13740
13741   if (Swapped)
13742     std::swap(BasePtr, Offset);
13743
13744   // Replace other uses of BasePtr that can be updated to use Ptr
13745   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13746     unsigned OffsetIdx = 1;
13747     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13748       OffsetIdx = 0;
13749     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13750            BasePtr.getNode() && "Expected BasePtr operand");
13751
13752     // We need to replace ptr0 in the following expression:
13753     //   x0 * offset0 + y0 * ptr0 = t0
13754     // knowing that
13755     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13756     //
13757     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13758     // indexed load/store and the expression that needs to be re-written.
13759     //
13760     // Therefore, we have:
13761     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13762
13763     ConstantSDNode *CN =
13764       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13765     int X0, X1, Y0, Y1;
13766     const APInt &Offset0 = CN->getAPIntValue();
13767     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13768
13769     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13770     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13771     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13772     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13773
13774     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13775
13776     APInt CNV = Offset0;
13777     if (X0 < 0) CNV = -CNV;
13778     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13779     else CNV = CNV - Offset1;
13780
13781     SDLoc DL(OtherUses[i]);
13782
13783     // We can now generate the new expression.
13784     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13785     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13786
13787     SDValue NewUse = DAG.getNode(Opcode,
13788                                  DL,
13789                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13790     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13791     deleteAndRecombine(OtherUses[i]);
13792   }
13793
13794   // Replace the uses of Ptr with uses of the updated base value.
13795   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13796   deleteAndRecombine(Ptr.getNode());
13797   AddToWorklist(Result.getNode());
13798
13799   return true;
13800 }
13801
13802 /// Try to combine a load/store with a add/sub of the base pointer node into a
13803 /// post-indexed load/store. The transformation folded the add/subtract into the
13804 /// new indexed load/store effectively and all of its uses are redirected to the
13805 /// new load/store.
13806 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13807   if (Level < AfterLegalizeDAG)
13808     return false;
13809
13810   bool isLoad = true;
13811   SDValue Ptr;
13812   EVT VT;
13813   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13814     if (LD->isIndexed())
13815       return false;
13816     VT = LD->getMemoryVT();
13817     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13818         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13819       return false;
13820     Ptr = LD->getBasePtr();
13821   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13822     if (ST->isIndexed())
13823       return false;
13824     VT = ST->getMemoryVT();
13825     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13826         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13827       return false;
13828     Ptr = ST->getBasePtr();
13829     isLoad = false;
13830   } else {
13831     return false;
13832   }
13833
13834   if (Ptr.getNode()->hasOneUse())
13835     return false;
13836
13837   for (SDNode *Op : Ptr.getNode()->uses()) {
13838     if (Op == N ||
13839         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13840       continue;
13841
13842     SDValue BasePtr;
13843     SDValue Offset;
13844     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13845     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13846       // Don't create a indexed load / store with zero offset.
13847       if (isNullConstant(Offset))
13848         continue;
13849
13850       // Try turning it into a post-indexed load / store except when
13851       // 1) All uses are load / store ops that use it as base ptr (and
13852       //    it may be folded as addressing mmode).
13853       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13854       //    nor a successor of N. Otherwise, if Op is folded that would
13855       //    create a cycle.
13856
13857       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13858         continue;
13859
13860       // Check for #1.
13861       bool TryNext = false;
13862       for (SDNode *Use : BasePtr.getNode()->uses()) {
13863         if (Use == Ptr.getNode())
13864           continue;
13865
13866         // If all the uses are load / store addresses, then don't do the
13867         // transformation.
13868         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13869           bool RealUse = false;
13870           for (SDNode *UseUse : Use->uses()) {
13871             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13872               RealUse = true;
13873           }
13874
13875           if (!RealUse) {
13876             TryNext = true;
13877             break;
13878           }
13879         }
13880       }
13881
13882       if (TryNext)
13883         continue;
13884
13885       // Check for #2.
13886       SmallPtrSet<const SDNode *, 32> Visited;
13887       SmallVector<const SDNode *, 8> Worklist;
13888       // Ptr is predecessor to both N and Op.
13889       Visited.insert(Ptr.getNode());
13890       Worklist.push_back(N);
13891       Worklist.push_back(Op);
13892       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13893           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13894         SDValue Result = isLoad
13895           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13896                                BasePtr, Offset, AM)
13897           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13898                                 BasePtr, Offset, AM);
13899         ++PostIndexedNodes;
13900         ++NodesCombined;
13901         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13902                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13903                    dbgs() << '\n');
13904         WorklistRemover DeadNodes(*this);
13905         if (isLoad) {
13906           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13907           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13908         } else {
13909           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13910         }
13911
13912         // Finally, since the node is now dead, remove it from the graph.
13913         deleteAndRecombine(N);
13914
13915         // Replace the uses of Use with uses of the updated base value.
13916         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13917                                       Result.getValue(isLoad ? 1 : 0));
13918         deleteAndRecombine(Op);
13919         return true;
13920       }
13921     }
13922   }
13923
13924   return false;
13925 }
13926
13927 /// Return the base-pointer arithmetic from an indexed \p LD.
13928 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13929   ISD::MemIndexedMode AM = LD->getAddressingMode();
13930   assert(AM != ISD::UNINDEXED);
13931   SDValue BP = LD->getOperand(1);
13932   SDValue Inc = LD->getOperand(2);
13933
13934   // Some backends use TargetConstants for load offsets, but don't expect
13935   // TargetConstants in general ADD nodes. We can convert these constants into
13936   // regular Constants (if the constant is not opaque).
13937   assert((Inc.getOpcode() != ISD::TargetConstant ||
13938           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13939          "Cannot split out indexing using opaque target constants");
13940   if (Inc.getOpcode() == ISD::TargetConstant) {
13941     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13942     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13943                           ConstInc->getValueType(0));
13944   }
13945
13946   unsigned Opc =
13947       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13948   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13949 }
13950
13951 static inline int numVectorEltsOrZero(EVT T) {
13952   return T.isVector() ? T.getVectorNumElements() : 0;
13953 }
13954
13955 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13956   Val = ST->getValue();
13957   EVT STType = Val.getValueType();
13958   EVT STMemType = ST->getMemoryVT();
13959   if (STType == STMemType)
13960     return true;
13961   if (isTypeLegal(STMemType))
13962     return false; // fail.
13963   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13964       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13965     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13966     return true;
13967   }
13968   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13969       STType.isInteger() && STMemType.isInteger()) {
13970     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13971     return true;
13972   }
13973   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13974     Val = DAG.getBitcast(STMemType, Val);
13975     return true;
13976   }
13977   return false; // fail.
13978 }
13979
13980 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13981   EVT LDMemType = LD->getMemoryVT();
13982   EVT LDType = LD->getValueType(0);
13983   assert(Val.getValueType() == LDMemType &&
13984          "Attempting to extend value of non-matching type");
13985   if (LDType == LDMemType)
13986     return true;
13987   if (LDMemType.isInteger() && LDType.isInteger()) {
13988     switch (LD->getExtensionType()) {
13989     case ISD::NON_EXTLOAD:
13990       Val = DAG.getBitcast(LDType, Val);
13991       return true;
13992     case ISD::EXTLOAD:
13993       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13994       return true;
13995     case ISD::SEXTLOAD:
13996       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13997       return true;
13998     case ISD::ZEXTLOAD:
13999       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14000       return true;
14001     }
14002   }
14003   return false;
14004 }
14005
14006 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14007   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
14008     return SDValue();
14009   SDValue Chain = LD->getOperand(0);
14010   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14011   if (!ST || ST->isVolatile())
14012     return SDValue();
14013
14014   EVT LDType = LD->getValueType(0);
14015   EVT LDMemType = LD->getMemoryVT();
14016   EVT STMemType = ST->getMemoryVT();
14017   EVT STType = ST->getValue().getValueType();
14018
14019   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14020   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14021   int64_t Offset;
14022   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14023     return SDValue();
14024
14025   // Normalize for Endianness. After this Offset=0 will denote that the least
14026   // significant bit in the loaded value maps to the least significant bit in
14027   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14028   // n:th least significant byte of the stored value.
14029   if (DAG.getDataLayout().isBigEndian())
14030     Offset = (STMemType.getStoreSizeInBits() -
14031               LDMemType.getStoreSizeInBits()) / 8 - Offset;
14032
14033   // Check that the stored value cover all bits that are loaded.
14034   bool STCoversLD =
14035       (Offset >= 0) &&
14036       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14037
14038   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14039     if (LD->isIndexed()) {
14040       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
14041                     LD->getAddressingMode() == ISD::POST_DEC);
14042       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
14043       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
14044                              LD->getOperand(1), LD->getOperand(2));
14045       SDValue Ops[] = {Val, Idx, Chain};
14046       return CombineTo(LD, Ops, 3);
14047     }
14048     return CombineTo(LD, Val, Chain);
14049   };
14050
14051   if (!STCoversLD)
14052     return SDValue();
14053
14054   // Memory as copy space (potentially masked).
14055   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14056     // Simple case: Direct non-truncating forwarding
14057     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14058       return ReplaceLd(LD, ST->getValue(), Chain);
14059     // Can we model the truncate and extension with an and mask?
14060     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14061         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14062       // Mask to size of LDMemType
14063       auto Mask =
14064           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14065                                                STMemType.getSizeInBits()),
14066                           SDLoc(ST), STType);
14067       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14068       return ReplaceLd(LD, Val, Chain);
14069     }
14070   }
14071
14072   // TODO: Deal with nonzero offset.
14073   if (LD->getBasePtr().isUndef() || Offset != 0)
14074     return SDValue();
14075   // Model necessary truncations / extenstions.
14076   SDValue Val;
14077   // Truncate Value To Stored Memory Size.
14078   do {
14079     if (!getTruncatedStoreValue(ST, Val))
14080       continue;
14081     if (!isTypeLegal(LDMemType))
14082       continue;
14083     if (STMemType != LDMemType) {
14084       // TODO: Support vectors? This requires extract_subvector/bitcast.
14085       if (!STMemType.isVector() && !LDMemType.isVector() &&
14086           STMemType.isInteger() && LDMemType.isInteger())
14087         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14088       else
14089         continue;
14090     }
14091     if (!extendLoadedValueToExtension(LD, Val))
14092       continue;
14093     return ReplaceLd(LD, Val, Chain);
14094   } while (false);
14095
14096   // On failure, cleanup dead nodes we may have created.
14097   if (Val->use_empty())
14098     deleteAndRecombine(Val.getNode());
14099   return SDValue();
14100 }
14101
14102 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14103   LoadSDNode *LD  = cast<LoadSDNode>(N);
14104   SDValue Chain = LD->getChain();
14105   SDValue Ptr   = LD->getBasePtr();
14106
14107   // If load is not volatile and there are no uses of the loaded value (and
14108   // the updated indexed value in case of indexed loads), change uses of the
14109   // chain value into uses of the chain input (i.e. delete the dead load).
14110   if (!LD->isVolatile()) {
14111     if (N->getValueType(1) == MVT::Other) {
14112       // Unindexed loads.
14113       if (!N->hasAnyUseOfValue(0)) {
14114         // It's not safe to use the two value CombineTo variant here. e.g.
14115         // v1, chain2 = load chain1, loc
14116         // v2, chain3 = load chain2, loc
14117         // v3         = add v2, c
14118         // Now we replace use of chain2 with chain1.  This makes the second load
14119         // isomorphic to the one we are deleting, and thus makes this load live.
14120         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14121                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14122                    dbgs() << "\n");
14123         WorklistRemover DeadNodes(*this);
14124         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14125         AddUsersToWorklist(Chain.getNode());
14126         if (N->use_empty())
14127           deleteAndRecombine(N);
14128
14129         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14130       }
14131     } else {
14132       // Indexed loads.
14133       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14134
14135       // If this load has an opaque TargetConstant offset, then we cannot split
14136       // the indexing into an add/sub directly (that TargetConstant may not be
14137       // valid for a different type of node, and we cannot convert an opaque
14138       // target constant into a regular constant).
14139       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14140                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
14141
14142       if (!N->hasAnyUseOfValue(0) &&
14143           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
14144         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14145         SDValue Index;
14146         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14147           Index = SplitIndexingFromLoad(LD);
14148           // Try to fold the base pointer arithmetic into subsequent loads and
14149           // stores.
14150           AddUsersToWorklist(N);
14151         } else
14152           Index = DAG.getUNDEF(N->getValueType(1));
14153         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14154                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14155                    dbgs() << " and 2 other values\n");
14156         WorklistRemover DeadNodes(*this);
14157         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14158         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14159         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14160         deleteAndRecombine(N);
14161         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14162       }
14163     }
14164   }
14165
14166   // If this load is directly stored, replace the load value with the stored
14167   // value.
14168   if (auto V = ForwardStoreValueToDirectLoad(LD))
14169     return V;
14170
14171   // Try to infer better alignment information than the load already has.
14172   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
14173     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14174       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
14175         SDValue NewLoad = DAG.getExtLoad(
14176             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14177             LD->getPointerInfo(), LD->getMemoryVT(), Align,
14178             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14179         // NewLoad will always be N as we are only refining the alignment
14180         assert(NewLoad.getNode() == N);
14181         (void)NewLoad;
14182       }
14183     }
14184   }
14185
14186   if (LD->isUnindexed()) {
14187     // Walk up chain skipping non-aliasing memory nodes.
14188     SDValue BetterChain = FindBetterChain(LD, Chain);
14189
14190     // If there is a better chain.
14191     if (Chain != BetterChain) {
14192       SDValue ReplLoad;
14193
14194       // Replace the chain to void dependency.
14195       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14196         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14197                                BetterChain, Ptr, LD->getMemOperand());
14198       } else {
14199         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14200                                   LD->getValueType(0),
14201                                   BetterChain, Ptr, LD->getMemoryVT(),
14202                                   LD->getMemOperand());
14203       }
14204
14205       // Create token factor to keep old chain connected.
14206       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14207                                   MVT::Other, Chain, ReplLoad.getValue(1));
14208
14209       // Replace uses with load result and token factor
14210       return CombineTo(N, ReplLoad.getValue(0), Token);
14211     }
14212   }
14213
14214   // Try transforming N to an indexed load.
14215   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14216     return SDValue(N, 0);
14217
14218   // Try to slice up N to more direct loads if the slices are mapped to
14219   // different register banks or pairing can take place.
14220   if (SliceUpLoad(N))
14221     return SDValue(N, 0);
14222
14223   return SDValue();
14224 }
14225
14226 namespace {
14227
14228 /// Helper structure used to slice a load in smaller loads.
14229 /// Basically a slice is obtained from the following sequence:
14230 /// Origin = load Ty1, Base
14231 /// Shift = srl Ty1 Origin, CstTy Amount
14232 /// Inst = trunc Shift to Ty2
14233 ///
14234 /// Then, it will be rewritten into:
14235 /// Slice = load SliceTy, Base + SliceOffset
14236 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14237 ///
14238 /// SliceTy is deduced from the number of bits that are actually used to
14239 /// build Inst.
14240 struct LoadedSlice {
14241   /// Helper structure used to compute the cost of a slice.
14242   struct Cost {
14243     /// Are we optimizing for code size.
14244     bool ForCodeSize;
14245
14246     /// Various cost.
14247     unsigned Loads = 0;
14248     unsigned Truncates = 0;
14249     unsigned CrossRegisterBanksCopies = 0;
14250     unsigned ZExts = 0;
14251     unsigned Shift = 0;
14252
14253     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
14254
14255     /// Get the cost of one isolated slice.
14256     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
14257         : ForCodeSize(ForCodeSize), Loads(1) {
14258       EVT TruncType = LS.Inst->getValueType(0);
14259       EVT LoadedType = LS.getLoadedType();
14260       if (TruncType != LoadedType &&
14261           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14262         ZExts = 1;
14263     }
14264
14265     /// Account for slicing gain in the current cost.
14266     /// Slicing provide a few gains like removing a shift or a
14267     /// truncate. This method allows to grow the cost of the original
14268     /// load with the gain from this slice.
14269     void addSliceGain(const LoadedSlice &LS) {
14270       // Each slice saves a truncate.
14271       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14272       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14273                               LS.Inst->getValueType(0)))
14274         ++Truncates;
14275       // If there is a shift amount, this slice gets rid of it.
14276       if (LS.Shift)
14277         ++Shift;
14278       // If this slice can merge a cross register bank copy, account for it.
14279       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14280         ++CrossRegisterBanksCopies;
14281     }
14282
14283     Cost &operator+=(const Cost &RHS) {
14284       Loads += RHS.Loads;
14285       Truncates += RHS.Truncates;
14286       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14287       ZExts += RHS.ZExts;
14288       Shift += RHS.Shift;
14289       return *this;
14290     }
14291
14292     bool operator==(const Cost &RHS) const {
14293       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14294              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14295              ZExts == RHS.ZExts && Shift == RHS.Shift;
14296     }
14297
14298     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14299
14300     bool operator<(const Cost &RHS) const {
14301       // Assume cross register banks copies are as expensive as loads.
14302       // FIXME: Do we want some more target hooks?
14303       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14304       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14305       // Unless we are optimizing for code size, consider the
14306       // expensive operation first.
14307       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14308         return ExpensiveOpsLHS < ExpensiveOpsRHS;
14309       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14310              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14311     }
14312
14313     bool operator>(const Cost &RHS) const { return RHS < *this; }
14314
14315     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14316
14317     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14318   };
14319
14320   // The last instruction that represent the slice. This should be a
14321   // truncate instruction.
14322   SDNode *Inst;
14323
14324   // The original load instruction.
14325   LoadSDNode *Origin;
14326
14327   // The right shift amount in bits from the original load.
14328   unsigned Shift;
14329
14330   // The DAG from which Origin came from.
14331   // This is used to get some contextual information about legal types, etc.
14332   SelectionDAG *DAG;
14333
14334   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14335               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14336       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14337
14338   /// Get the bits used in a chunk of bits \p BitWidth large.
14339   /// \return Result is \p BitWidth and has used bits set to 1 and
14340   ///         not used bits set to 0.
14341   APInt getUsedBits() const {
14342     // Reproduce the trunc(lshr) sequence:
14343     // - Start from the truncated value.
14344     // - Zero extend to the desired bit width.
14345     // - Shift left.
14346     assert(Origin && "No original load to compare against.");
14347     unsigned BitWidth = Origin->getValueSizeInBits(0);
14348     assert(Inst && "This slice is not bound to an instruction");
14349     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14350            "Extracted slice is bigger than the whole type!");
14351     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14352     UsedBits.setAllBits();
14353     UsedBits = UsedBits.zext(BitWidth);
14354     UsedBits <<= Shift;
14355     return UsedBits;
14356   }
14357
14358   /// Get the size of the slice to be loaded in bytes.
14359   unsigned getLoadedSize() const {
14360     unsigned SliceSize = getUsedBits().countPopulation();
14361     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14362     return SliceSize / 8;
14363   }
14364
14365   /// Get the type that will be loaded for this slice.
14366   /// Note: This may not be the final type for the slice.
14367   EVT getLoadedType() const {
14368     assert(DAG && "Missing context");
14369     LLVMContext &Ctxt = *DAG->getContext();
14370     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14371   }
14372
14373   /// Get the alignment of the load used for this slice.
14374   unsigned getAlignment() const {
14375     unsigned Alignment = Origin->getAlignment();
14376     uint64_t Offset = getOffsetFromBase();
14377     if (Offset != 0)
14378       Alignment = MinAlign(Alignment, Alignment + Offset);
14379     return Alignment;
14380   }
14381
14382   /// Check if this slice can be rewritten with legal operations.
14383   bool isLegal() const {
14384     // An invalid slice is not legal.
14385     if (!Origin || !Inst || !DAG)
14386       return false;
14387
14388     // Offsets are for indexed load only, we do not handle that.
14389     if (!Origin->getOffset().isUndef())
14390       return false;
14391
14392     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14393
14394     // Check that the type is legal.
14395     EVT SliceType = getLoadedType();
14396     if (!TLI.isTypeLegal(SliceType))
14397       return false;
14398
14399     // Check that the load is legal for this type.
14400     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14401       return false;
14402
14403     // Check that the offset can be computed.
14404     // 1. Check its type.
14405     EVT PtrType = Origin->getBasePtr().getValueType();
14406     if (PtrType == MVT::Untyped || PtrType.isExtended())
14407       return false;
14408
14409     // 2. Check that it fits in the immediate.
14410     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14411       return false;
14412
14413     // 3. Check that the computation is legal.
14414     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14415       return false;
14416
14417     // Check that the zext is legal if it needs one.
14418     EVT TruncateType = Inst->getValueType(0);
14419     if (TruncateType != SliceType &&
14420         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14421       return false;
14422
14423     return true;
14424   }
14425
14426   /// Get the offset in bytes of this slice in the original chunk of
14427   /// bits.
14428   /// \pre DAG != nullptr.
14429   uint64_t getOffsetFromBase() const {
14430     assert(DAG && "Missing context.");
14431     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14432     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14433     uint64_t Offset = Shift / 8;
14434     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14435     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14436            "The size of the original loaded type is not a multiple of a"
14437            " byte.");
14438     // If Offset is bigger than TySizeInBytes, it means we are loading all
14439     // zeros. This should have been optimized before in the process.
14440     assert(TySizeInBytes > Offset &&
14441            "Invalid shift amount for given loaded size");
14442     if (IsBigEndian)
14443       Offset = TySizeInBytes - Offset - getLoadedSize();
14444     return Offset;
14445   }
14446
14447   /// Generate the sequence of instructions to load the slice
14448   /// represented by this object and redirect the uses of this slice to
14449   /// this new sequence of instructions.
14450   /// \pre this->Inst && this->Origin are valid Instructions and this
14451   /// object passed the legal check: LoadedSlice::isLegal returned true.
14452   /// \return The last instruction of the sequence used to load the slice.
14453   SDValue loadSlice() const {
14454     assert(Inst && Origin && "Unable to replace a non-existing slice.");
14455     const SDValue &OldBaseAddr = Origin->getBasePtr();
14456     SDValue BaseAddr = OldBaseAddr;
14457     // Get the offset in that chunk of bytes w.r.t. the endianness.
14458     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14459     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14460     if (Offset) {
14461       // BaseAddr = BaseAddr + Offset.
14462       EVT ArithType = BaseAddr.getValueType();
14463       SDLoc DL(Origin);
14464       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14465                               DAG->getConstant(Offset, DL, ArithType));
14466     }
14467
14468     // Create the type of the loaded slice according to its size.
14469     EVT SliceType = getLoadedType();
14470
14471     // Create the load for the slice.
14472     SDValue LastInst =
14473         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14474                      Origin->getPointerInfo().getWithOffset(Offset),
14475                      getAlignment(), Origin->getMemOperand()->getFlags());
14476     // If the final type is not the same as the loaded type, this means that
14477     // we have to pad with zero. Create a zero extend for that.
14478     EVT FinalType = Inst->getValueType(0);
14479     if (SliceType != FinalType)
14480       LastInst =
14481           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14482     return LastInst;
14483   }
14484
14485   /// Check if this slice can be merged with an expensive cross register
14486   /// bank copy. E.g.,
14487   /// i = load i32
14488   /// f = bitcast i32 i to float
14489   bool canMergeExpensiveCrossRegisterBankCopy() const {
14490     if (!Inst || !Inst->hasOneUse())
14491       return false;
14492     SDNode *Use = *Inst->use_begin();
14493     if (Use->getOpcode() != ISD::BITCAST)
14494       return false;
14495     assert(DAG && "Missing context");
14496     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14497     EVT ResVT = Use->getValueType(0);
14498     const TargetRegisterClass *ResRC =
14499         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14500     const TargetRegisterClass *ArgRC =
14501         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14502                            Use->getOperand(0)->isDivergent());
14503     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14504       return false;
14505
14506     // At this point, we know that we perform a cross-register-bank copy.
14507     // Check if it is expensive.
14508     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14509     // Assume bitcasts are cheap, unless both register classes do not
14510     // explicitly share a common sub class.
14511     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14512       return false;
14513
14514     // Check if it will be merged with the load.
14515     // 1. Check the alignment constraint.
14516     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14517         ResVT.getTypeForEVT(*DAG->getContext()));
14518
14519     if (RequiredAlignment > getAlignment())
14520       return false;
14521
14522     // 2. Check that the load is a legal operation for that type.
14523     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14524       return false;
14525
14526     // 3. Check that we do not have a zext in the way.
14527     if (Inst->getValueType(0) != getLoadedType())
14528       return false;
14529
14530     return true;
14531   }
14532 };
14533
14534 } // end anonymous namespace
14535
14536 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
14537 /// \p UsedBits looks like 0..0 1..1 0..0.
14538 static bool areUsedBitsDense(const APInt &UsedBits) {
14539   // If all the bits are one, this is dense!
14540   if (UsedBits.isAllOnesValue())
14541     return true;
14542
14543   // Get rid of the unused bits on the right.
14544   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14545   // Get rid of the unused bits on the left.
14546   if (NarrowedUsedBits.countLeadingZeros())
14547     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14548   // Check that the chunk of bits is completely used.
14549   return NarrowedUsedBits.isAllOnesValue();
14550 }
14551
14552 /// Check whether or not \p First and \p Second are next to each other
14553 /// in memory. This means that there is no hole between the bits loaded
14554 /// by \p First and the bits loaded by \p Second.
14555 static bool areSlicesNextToEachOther(const LoadedSlice &First,
14556                                      const LoadedSlice &Second) {
14557   assert(First.Origin == Second.Origin && First.Origin &&
14558          "Unable to match different memory origins.");
14559   APInt UsedBits = First.getUsedBits();
14560   assert((UsedBits & Second.getUsedBits()) == 0 &&
14561          "Slices are not supposed to overlap.");
14562   UsedBits |= Second.getUsedBits();
14563   return areUsedBitsDense(UsedBits);
14564 }
14565
14566 /// Adjust the \p GlobalLSCost according to the target
14567 /// paring capabilities and the layout of the slices.
14568 /// \pre \p GlobalLSCost should account for at least as many loads as
14569 /// there is in the slices in \p LoadedSlices.
14570 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14571                                  LoadedSlice::Cost &GlobalLSCost) {
14572   unsigned NumberOfSlices = LoadedSlices.size();
14573   // If there is less than 2 elements, no pairing is possible.
14574   if (NumberOfSlices < 2)
14575     return;
14576
14577   // Sort the slices so that elements that are likely to be next to each
14578   // other in memory are next to each other in the list.
14579   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14580     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14581     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14582   });
14583   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14584   // First (resp. Second) is the first (resp. Second) potentially candidate
14585   // to be placed in a paired load.
14586   const LoadedSlice *First = nullptr;
14587   const LoadedSlice *Second = nullptr;
14588   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14589                 // Set the beginning of the pair.
14590                                                            First = Second) {
14591     Second = &LoadedSlices[CurrSlice];
14592
14593     // If First is NULL, it means we start a new pair.
14594     // Get to the next slice.
14595     if (!First)
14596       continue;
14597
14598     EVT LoadedType = First->getLoadedType();
14599
14600     // If the types of the slices are different, we cannot pair them.
14601     if (LoadedType != Second->getLoadedType())
14602       continue;
14603
14604     // Check if the target supplies paired loads for this type.
14605     unsigned RequiredAlignment = 0;
14606     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14607       // move to the next pair, this type is hopeless.
14608       Second = nullptr;
14609       continue;
14610     }
14611     // Check if we meet the alignment requirement.
14612     if (RequiredAlignment > First->getAlignment())
14613       continue;
14614
14615     // Check that both loads are next to each other in memory.
14616     if (!areSlicesNextToEachOther(*First, *Second))
14617       continue;
14618
14619     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14620     --GlobalLSCost.Loads;
14621     // Move to the next pair.
14622     Second = nullptr;
14623   }
14624 }
14625
14626 /// Check the profitability of all involved LoadedSlice.
14627 /// Currently, it is considered profitable if there is exactly two
14628 /// involved slices (1) which are (2) next to each other in memory, and
14629 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14630 ///
14631 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14632 /// the elements themselves.
14633 ///
14634 /// FIXME: When the cost model will be mature enough, we can relax
14635 /// constraints (1) and (2).
14636 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14637                                 const APInt &UsedBits, bool ForCodeSize) {
14638   unsigned NumberOfSlices = LoadedSlices.size();
14639   if (StressLoadSlicing)
14640     return NumberOfSlices > 1;
14641
14642   // Check (1).
14643   if (NumberOfSlices != 2)
14644     return false;
14645
14646   // Check (2).
14647   if (!areUsedBitsDense(UsedBits))
14648     return false;
14649
14650   // Check (3).
14651   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14652   // The original code has one big load.
14653   OrigCost.Loads = 1;
14654   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14655     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14656     // Accumulate the cost of all the slices.
14657     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14658     GlobalSlicingCost += SliceCost;
14659
14660     // Account as cost in the original configuration the gain obtained
14661     // with the current slices.
14662     OrigCost.addSliceGain(LS);
14663   }
14664
14665   // If the target supports paired load, adjust the cost accordingly.
14666   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14667   return OrigCost > GlobalSlicingCost;
14668 }
14669
14670 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14671 /// operations, split it in the various pieces being extracted.
14672 ///
14673 /// This sort of thing is introduced by SROA.
14674 /// This slicing takes care not to insert overlapping loads.
14675 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14676 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14677   if (Level < AfterLegalizeDAG)
14678     return false;
14679
14680   LoadSDNode *LD = cast<LoadSDNode>(N);
14681   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
14682       !LD->getValueType(0).isInteger())
14683     return false;
14684
14685   // Keep track of already used bits to detect overlapping values.
14686   // In that case, we will just abort the transformation.
14687   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14688
14689   SmallVector<LoadedSlice, 4> LoadedSlices;
14690
14691   // Check if this load is used as several smaller chunks of bits.
14692   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14693   // of computation for each trunc.
14694   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14695        UI != UIEnd; ++UI) {
14696     // Skip the uses of the chain.
14697     if (UI.getUse().getResNo() != 0)
14698       continue;
14699
14700     SDNode *User = *UI;
14701     unsigned Shift = 0;
14702
14703     // Check if this is a trunc(lshr).
14704     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14705         isa<ConstantSDNode>(User->getOperand(1))) {
14706       Shift = User->getConstantOperandVal(1);
14707       User = *User->use_begin();
14708     }
14709
14710     // At this point, User is a Truncate, iff we encountered, trunc or
14711     // trunc(lshr).
14712     if (User->getOpcode() != ISD::TRUNCATE)
14713       return false;
14714
14715     // The width of the type must be a power of 2 and greater than 8-bits.
14716     // Otherwise the load cannot be represented in LLVM IR.
14717     // Moreover, if we shifted with a non-8-bits multiple, the slice
14718     // will be across several bytes. We do not support that.
14719     unsigned Width = User->getValueSizeInBits(0);
14720     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14721       return false;
14722
14723     // Build the slice for this chain of computations.
14724     LoadedSlice LS(User, LD, Shift, &DAG);
14725     APInt CurrentUsedBits = LS.getUsedBits();
14726
14727     // Check if this slice overlaps with another.
14728     if ((CurrentUsedBits & UsedBits) != 0)
14729       return false;
14730     // Update the bits used globally.
14731     UsedBits |= CurrentUsedBits;
14732
14733     // Check if the new slice would be legal.
14734     if (!LS.isLegal())
14735       return false;
14736
14737     // Record the slice.
14738     LoadedSlices.push_back(LS);
14739   }
14740
14741   // Abort slicing if it does not seem to be profitable.
14742   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14743     return false;
14744
14745   ++SlicedLoads;
14746
14747   // Rewrite each chain to use an independent load.
14748   // By construction, each chain can be represented by a unique load.
14749
14750   // Prepare the argument for the new token factor for all the slices.
14751   SmallVector<SDValue, 8> ArgChains;
14752   for (SmallVectorImpl<LoadedSlice>::const_iterator
14753            LSIt = LoadedSlices.begin(),
14754            LSItEnd = LoadedSlices.end();
14755        LSIt != LSItEnd; ++LSIt) {
14756     SDValue SliceInst = LSIt->loadSlice();
14757     CombineTo(LSIt->Inst, SliceInst, true);
14758     if (SliceInst.getOpcode() != ISD::LOAD)
14759       SliceInst = SliceInst.getOperand(0);
14760     assert(SliceInst->getOpcode() == ISD::LOAD &&
14761            "It takes more than a zext to get to the loaded slice!!");
14762     ArgChains.push_back(SliceInst.getValue(1));
14763   }
14764
14765   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14766                               ArgChains);
14767   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14768   AddToWorklist(Chain.getNode());
14769   return true;
14770 }
14771
14772 /// Check to see if V is (and load (ptr), imm), where the load is having
14773 /// specific bytes cleared out.  If so, return the byte size being masked out
14774 /// and the shift amount.
14775 static std::pair<unsigned, unsigned>
14776 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14777   std::pair<unsigned, unsigned> Result(0, 0);
14778
14779   // Check for the structure we're looking for.
14780   if (V->getOpcode() != ISD::AND ||
14781       !isa<ConstantSDNode>(V->getOperand(1)) ||
14782       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14783     return Result;
14784
14785   // Check the chain and pointer.
14786   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14787   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14788
14789   // This only handles simple types.
14790   if (V.getValueType() != MVT::i16 &&
14791       V.getValueType() != MVT::i32 &&
14792       V.getValueType() != MVT::i64)
14793     return Result;
14794
14795   // Check the constant mask.  Invert it so that the bits being masked out are
14796   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14797   // follow the sign bit for uniformity.
14798   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14799   unsigned NotMaskLZ = countLeadingZeros(NotMask);
14800   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
14801   unsigned NotMaskTZ = countTrailingZeros(NotMask);
14802   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
14803   if (NotMaskLZ == 64) return Result;  // All zero mask.
14804
14805   // See if we have a continuous run of bits.  If so, we have 0*1+0*
14806   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14807     return Result;
14808
14809   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14810   if (V.getValueType() != MVT::i64 && NotMaskLZ)
14811     NotMaskLZ -= 64-V.getValueSizeInBits();
14812
14813   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14814   switch (MaskedBytes) {
14815   case 1:
14816   case 2:
14817   case 4: break;
14818   default: return Result; // All one mask, or 5-byte mask.
14819   }
14820
14821   // Verify that the first bit starts at a multiple of mask so that the access
14822   // is aligned the same as the access width.
14823   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14824
14825   // For narrowing to be valid, it must be the case that the load the
14826   // immediately preceding memory operation before the store.
14827   if (LD == Chain.getNode())
14828     ; // ok.
14829   else if (Chain->getOpcode() == ISD::TokenFactor &&
14830            SDValue(LD, 1).hasOneUse()) {
14831     // LD has only 1 chain use so they are no indirect dependencies.
14832     bool isOk = false;
14833     for (const SDValue &ChainOp : Chain->op_values())
14834       if (ChainOp.getNode() == LD) {
14835         isOk = true;
14836         break;
14837       }
14838     if (!isOk)
14839       return Result;
14840   } else
14841     return Result; // Fail.
14842
14843   Result.first = MaskedBytes;
14844   Result.second = NotMaskTZ/8;
14845   return Result;
14846 }
14847
14848 /// Check to see if IVal is something that provides a value as specified by
14849 /// MaskInfo. If so, replace the specified store with a narrower store of
14850 /// truncated IVal.
14851 static SDNode *
14852 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14853                                 SDValue IVal, StoreSDNode *St,
14854                                 DAGCombiner *DC) {
14855   unsigned NumBytes = MaskInfo.first;
14856   unsigned ByteShift = MaskInfo.second;
14857   SelectionDAG &DAG = DC->getDAG();
14858
14859   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14860   // that uses this.  If not, this is not a replacement.
14861   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14862                                   ByteShift*8, (ByteShift+NumBytes)*8);
14863   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14864
14865   // Check that it is legal on the target to do this.  It is legal if the new
14866   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14867   // legalization.
14868   MVT VT = MVT::getIntegerVT(NumBytes*8);
14869   if (!DC->isTypeLegal(VT))
14870     return nullptr;
14871
14872   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14873   // shifted by ByteShift and truncated down to NumBytes.
14874   if (ByteShift) {
14875     SDLoc DL(IVal);
14876     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14877                        DAG.getConstant(ByteShift*8, DL,
14878                                     DC->getShiftAmountTy(IVal.getValueType())));
14879   }
14880
14881   // Figure out the offset for the store and the alignment of the access.
14882   unsigned StOffset;
14883   unsigned NewAlign = St->getAlignment();
14884
14885   if (DAG.getDataLayout().isLittleEndian())
14886     StOffset = ByteShift;
14887   else
14888     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14889
14890   SDValue Ptr = St->getBasePtr();
14891   if (StOffset) {
14892     SDLoc DL(IVal);
14893     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14894                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14895     NewAlign = MinAlign(NewAlign, StOffset);
14896   }
14897
14898   // Truncate down to the new size.
14899   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14900
14901   ++OpsNarrowed;
14902   return DAG
14903       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14904                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14905       .getNode();
14906 }
14907
14908 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14909 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14910 /// narrowing the load and store if it would end up being a win for performance
14911 /// or code size.
14912 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14913   StoreSDNode *ST  = cast<StoreSDNode>(N);
14914   if (ST->isVolatile())
14915     return SDValue();
14916
14917   SDValue Chain = ST->getChain();
14918   SDValue Value = ST->getValue();
14919   SDValue Ptr   = ST->getBasePtr();
14920   EVT VT = Value.getValueType();
14921
14922   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14923     return SDValue();
14924
14925   unsigned Opc = Value.getOpcode();
14926
14927   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14928   // is a byte mask indicating a consecutive number of bytes, check to see if
14929   // Y is known to provide just those bytes.  If so, we try to replace the
14930   // load + replace + store sequence with a single (narrower) store, which makes
14931   // the load dead.
14932   if (Opc == ISD::OR) {
14933     std::pair<unsigned, unsigned> MaskedLoad;
14934     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14935     if (MaskedLoad.first)
14936       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14937                                                   Value.getOperand(1), ST,this))
14938         return SDValue(NewST, 0);
14939
14940     // Or is commutative, so try swapping X and Y.
14941     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14942     if (MaskedLoad.first)
14943       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14944                                                   Value.getOperand(0), ST,this))
14945         return SDValue(NewST, 0);
14946   }
14947
14948   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14949       Value.getOperand(1).getOpcode() != ISD::Constant)
14950     return SDValue();
14951
14952   SDValue N0 = Value.getOperand(0);
14953   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14954       Chain == SDValue(N0.getNode(), 1)) {
14955     LoadSDNode *LD = cast<LoadSDNode>(N0);
14956     if (LD->getBasePtr() != Ptr ||
14957         LD->getPointerInfo().getAddrSpace() !=
14958         ST->getPointerInfo().getAddrSpace())
14959       return SDValue();
14960
14961     // Find the type to narrow it the load / op / store to.
14962     SDValue N1 = Value.getOperand(1);
14963     unsigned BitWidth = N1.getValueSizeInBits();
14964     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14965     if (Opc == ISD::AND)
14966       Imm ^= APInt::getAllOnesValue(BitWidth);
14967     if (Imm == 0 || Imm.isAllOnesValue())
14968       return SDValue();
14969     unsigned ShAmt = Imm.countTrailingZeros();
14970     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14971     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14972     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14973     // The narrowing should be profitable, the load/store operation should be
14974     // legal (or custom) and the store size should be equal to the NewVT width.
14975     while (NewBW < BitWidth &&
14976            (NewVT.getStoreSizeInBits() != NewBW ||
14977             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14978             !TLI.isNarrowingProfitable(VT, NewVT))) {
14979       NewBW = NextPowerOf2(NewBW);
14980       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14981     }
14982     if (NewBW >= BitWidth)
14983       return SDValue();
14984
14985     // If the lsb changed does not start at the type bitwidth boundary,
14986     // start at the previous one.
14987     if (ShAmt % NewBW)
14988       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14989     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14990                                    std::min(BitWidth, ShAmt + NewBW));
14991     if ((Imm & Mask) == Imm) {
14992       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14993       if (Opc == ISD::AND)
14994         NewImm ^= APInt::getAllOnesValue(NewBW);
14995       uint64_t PtrOff = ShAmt / 8;
14996       // For big endian targets, we need to adjust the offset to the pointer to
14997       // load the correct bytes.
14998       if (DAG.getDataLayout().isBigEndian())
14999         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15000
15001       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
15002       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15003       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
15004         return SDValue();
15005
15006       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
15007                                    Ptr.getValueType(), Ptr,
15008                                    DAG.getConstant(PtrOff, SDLoc(LD),
15009                                                    Ptr.getValueType()));
15010       SDValue NewLD =
15011           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15012                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15013                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15014       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15015                                    DAG.getConstant(NewImm, SDLoc(Value),
15016                                                    NewVT));
15017       SDValue NewST =
15018           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15019                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15020
15021       AddToWorklist(NewPtr.getNode());
15022       AddToWorklist(NewLD.getNode());
15023       AddToWorklist(NewVal.getNode());
15024       WorklistRemover DeadNodes(*this);
15025       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15026       ++OpsNarrowed;
15027       return NewST;
15028     }
15029   }
15030
15031   return SDValue();
15032 }
15033
15034 /// For a given floating point load / store pair, if the load value isn't used
15035 /// by any other operations, then consider transforming the pair to integer
15036 /// load / store operations if the target deems the transformation profitable.
15037 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15038   StoreSDNode *ST  = cast<StoreSDNode>(N);
15039   SDValue Value = ST->getValue();
15040   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15041       Value.hasOneUse()) {
15042     LoadSDNode *LD = cast<LoadSDNode>(Value);
15043     EVT VT = LD->getMemoryVT();
15044     if (!VT.isFloatingPoint() ||
15045         VT != ST->getMemoryVT() ||
15046         LD->isNonTemporal() ||
15047         ST->isNonTemporal() ||
15048         LD->getPointerInfo().getAddrSpace() != 0 ||
15049         ST->getPointerInfo().getAddrSpace() != 0)
15050       return SDValue();
15051
15052     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
15053     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15054         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15055         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15056         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15057       return SDValue();
15058
15059     unsigned LDAlign = LD->getAlignment();
15060     unsigned STAlign = ST->getAlignment();
15061     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15062     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
15063     if (LDAlign < ABIAlign || STAlign < ABIAlign)
15064       return SDValue();
15065
15066     SDValue NewLD =
15067         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15068                     LD->getPointerInfo(), LDAlign);
15069
15070     SDValue NewST =
15071         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15072                      ST->getPointerInfo(), STAlign);
15073
15074     AddToWorklist(NewLD.getNode());
15075     AddToWorklist(NewST.getNode());
15076     WorklistRemover DeadNodes(*this);
15077     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15078     ++LdStFP2Int;
15079     return NewST;
15080   }
15081
15082   return SDValue();
15083 }
15084
15085 // This is a helper function for visitMUL to check the profitability
15086 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15087 // MulNode is the original multiply, AddNode is (add x, c1),
15088 // and ConstNode is c2.
15089 //
15090 // If the (add x, c1) has multiple uses, we could increase
15091 // the number of adds if we make this transformation.
15092 // It would only be worth doing this if we can remove a
15093 // multiply in the process. Check for that here.
15094 // To illustrate:
15095 //     (A + c1) * c3
15096 //     (A + c2) * c3
15097 // We're checking for cases where we have common "c3 * A" expressions.
15098 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15099                                               SDValue &AddNode,
15100                                               SDValue &ConstNode) {
15101   APInt Val;
15102
15103   // If the add only has one use, this would be OK to do.
15104   if (AddNode.getNode()->hasOneUse())
15105     return true;
15106
15107   // Walk all the users of the constant with which we're multiplying.
15108   for (SDNode *Use : ConstNode->uses()) {
15109     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15110       continue;
15111
15112     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15113       SDNode *OtherOp;
15114       SDNode *MulVar = AddNode.getOperand(0).getNode();
15115
15116       // OtherOp is what we're multiplying against the constant.
15117       if (Use->getOperand(0) == ConstNode)
15118         OtherOp = Use->getOperand(1).getNode();
15119       else
15120         OtherOp = Use->getOperand(0).getNode();
15121
15122       // Check to see if multiply is with the same operand of our "add".
15123       //
15124       //     ConstNode  = CONST
15125       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15126       //     ...
15127       //     AddNode  = (A + c1)  <-- MulVar is A.
15128       //         = AddNode * ConstNode   <-- current visiting instruction.
15129       //
15130       // If we make this transformation, we will have a common
15131       // multiply (ConstNode * A) that we can save.
15132       if (OtherOp == MulVar)
15133         return true;
15134
15135       // Now check to see if a future expansion will give us a common
15136       // multiply.
15137       //
15138       //     ConstNode  = CONST
15139       //     AddNode    = (A + c1)
15140       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15141       //     ...
15142       //     OtherOp = (A + c2)
15143       //     Use     = OtherOp * ConstNode <-- visiting Use.
15144       //
15145       // If we make this transformation, we will have a common
15146       // multiply (CONST * A) after we also do the same transformation
15147       // to the "t2" instruction.
15148       if (OtherOp->getOpcode() == ISD::ADD &&
15149           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15150           OtherOp->getOperand(0).getNode() == MulVar)
15151         return true;
15152     }
15153   }
15154
15155   // Didn't find a case where this would be profitable.
15156   return false;
15157 }
15158
15159 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15160                                          unsigned NumStores) {
15161   SmallVector<SDValue, 8> Chains;
15162   SmallPtrSet<const SDNode *, 8> Visited;
15163   SDLoc StoreDL(StoreNodes[0].MemNode);
15164
15165   for (unsigned i = 0; i < NumStores; ++i) {
15166     Visited.insert(StoreNodes[i].MemNode);
15167   }
15168
15169   // don't include nodes that are children or repeated nodes.
15170   for (unsigned i = 0; i < NumStores; ++i) {
15171     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15172       Chains.push_back(StoreNodes[i].MemNode->getChain());
15173   }
15174
15175   assert(Chains.size() > 0 && "Chain should have generated a chain");
15176   return DAG.getTokenFactor(StoreDL, Chains);
15177 }
15178
15179 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15180     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15181     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15182   // Make sure we have something to merge.
15183   if (NumStores < 2)
15184     return false;
15185
15186   // The latest Node in the DAG.
15187   SDLoc DL(StoreNodes[0].MemNode);
15188
15189   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
15190   unsigned SizeInBits = NumStores * ElementSizeBits;
15191   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15192
15193   EVT StoreTy;
15194   if (UseVector) {
15195     unsigned Elts = NumStores * NumMemElts;
15196     // Get the type for the merged vector store.
15197     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15198   } else
15199     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15200
15201   SDValue StoredVal;
15202   if (UseVector) {
15203     if (IsConstantSrc) {
15204       SmallVector<SDValue, 8> BuildVector;
15205       for (unsigned I = 0; I != NumStores; ++I) {
15206         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15207         SDValue Val = St->getValue();
15208         // If constant is of the wrong type, convert it now.
15209         if (MemVT != Val.getValueType()) {
15210           Val = peekThroughBitcasts(Val);
15211           // Deal with constants of wrong size.
15212           if (ElementSizeBits != Val.getValueSizeInBits()) {
15213             EVT IntMemVT =
15214                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15215             if (isa<ConstantFPSDNode>(Val)) {
15216               // Not clear how to truncate FP values.
15217               return false;
15218             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15219               Val = DAG.getConstant(C->getAPIntValue()
15220                                         .zextOrTrunc(Val.getValueSizeInBits())
15221                                         .zextOrTrunc(ElementSizeBits),
15222                                     SDLoc(C), IntMemVT);
15223           }
15224           // Make sure correctly size type is the correct type.
15225           Val = DAG.getBitcast(MemVT, Val);
15226         }
15227         BuildVector.push_back(Val);
15228       }
15229       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15230                                                : ISD::BUILD_VECTOR,
15231                               DL, StoreTy, BuildVector);
15232     } else {
15233       SmallVector<SDValue, 8> Ops;
15234       for (unsigned i = 0; i < NumStores; ++i) {
15235         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15236         SDValue Val = peekThroughBitcasts(St->getValue());
15237         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15238         // type MemVT. If the underlying value is not the correct
15239         // type, but it is an extraction of an appropriate vector we
15240         // can recast Val to be of the correct type. This may require
15241         // converting between EXTRACT_VECTOR_ELT and
15242         // EXTRACT_SUBVECTOR.
15243         if ((MemVT != Val.getValueType()) &&
15244             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15245              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15246           EVT MemVTScalarTy = MemVT.getScalarType();
15247           // We may need to add a bitcast here to get types to line up.
15248           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15249             Val = DAG.getBitcast(MemVT, Val);
15250           } else {
15251             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15252                                             : ISD::EXTRACT_VECTOR_ELT;
15253             SDValue Vec = Val.getOperand(0);
15254             SDValue Idx = Val.getOperand(1);
15255             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15256           }
15257         }
15258         Ops.push_back(Val);
15259       }
15260
15261       // Build the extracted vector elements back into a vector.
15262       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15263                                                : ISD::BUILD_VECTOR,
15264                               DL, StoreTy, Ops);
15265     }
15266   } else {
15267     // We should always use a vector store when merging extracted vector
15268     // elements, so this path implies a store of constants.
15269     assert(IsConstantSrc && "Merged vector elements should use vector store");
15270
15271     APInt StoreInt(SizeInBits, 0);
15272
15273     // Construct a single integer constant which is made of the smaller
15274     // constant inputs.
15275     bool IsLE = DAG.getDataLayout().isLittleEndian();
15276     for (unsigned i = 0; i < NumStores; ++i) {
15277       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15278       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15279
15280       SDValue Val = St->getValue();
15281       Val = peekThroughBitcasts(Val);
15282       StoreInt <<= ElementSizeBits;
15283       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15284         StoreInt |= C->getAPIntValue()
15285                         .zextOrTrunc(ElementSizeBits)
15286                         .zextOrTrunc(SizeInBits);
15287       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15288         StoreInt |= C->getValueAPF()
15289                         .bitcastToAPInt()
15290                         .zextOrTrunc(ElementSizeBits)
15291                         .zextOrTrunc(SizeInBits);
15292         // If fp truncation is necessary give up for now.
15293         if (MemVT.getSizeInBits() != ElementSizeBits)
15294           return false;
15295       } else {
15296         llvm_unreachable("Invalid constant element type");
15297       }
15298     }
15299
15300     // Create the new Load and Store operations.
15301     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15302   }
15303
15304   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15305   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15306
15307   // make sure we use trunc store if it's necessary to be legal.
15308   SDValue NewStore;
15309   if (!UseTrunc) {
15310     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15311                             FirstInChain->getPointerInfo(),
15312                             FirstInChain->getAlignment());
15313   } else { // Must be realized as a trunc store
15314     EVT LegalizedStoredValTy =
15315         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15316     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15317     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15318     SDValue ExtendedStoreVal =
15319         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15320                         LegalizedStoredValTy);
15321     NewStore = DAG.getTruncStore(
15322         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15323         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15324         FirstInChain->getAlignment(),
15325         FirstInChain->getMemOperand()->getFlags());
15326   }
15327
15328   // Replace all merged stores with the new store.
15329   for (unsigned i = 0; i < NumStores; ++i)
15330     CombineTo(StoreNodes[i].MemNode, NewStore);
15331
15332   AddToWorklist(NewChain.getNode());
15333   return true;
15334 }
15335
15336 void DAGCombiner::getStoreMergeCandidates(
15337     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15338     SDNode *&RootNode) {
15339   // This holds the base pointer, index, and the offset in bytes from the base
15340   // pointer.
15341   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15342   EVT MemVT = St->getMemoryVT();
15343
15344   SDValue Val = peekThroughBitcasts(St->getValue());
15345   // We must have a base and an offset.
15346   if (!BasePtr.getBase().getNode())
15347     return;
15348
15349   // Do not handle stores to undef base pointers.
15350   if (BasePtr.getBase().isUndef())
15351     return;
15352
15353   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15354   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15355                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15356   bool IsLoadSrc = isa<LoadSDNode>(Val);
15357   BaseIndexOffset LBasePtr;
15358   // Match on loadbaseptr if relevant.
15359   EVT LoadVT;
15360   if (IsLoadSrc) {
15361     auto *Ld = cast<LoadSDNode>(Val);
15362     LBasePtr = BaseIndexOffset::match(Ld, DAG);
15363     LoadVT = Ld->getMemoryVT();
15364     // Load and store should be the same type.
15365     if (MemVT != LoadVT)
15366       return;
15367     // Loads must only have one use.
15368     if (!Ld->hasNUsesOfValue(1, 0))
15369       return;
15370     // The memory operands must not be volatile/indexed.
15371     if (Ld->isVolatile() || Ld->isIndexed())
15372       return;
15373   }
15374   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15375                             int64_t &Offset) -> bool {
15376     // The memory operands must not be volatile/indexed.
15377     if (Other->isVolatile() || Other->isIndexed())
15378       return false;
15379     // Don't mix temporal stores with non-temporal stores.
15380     if (St->isNonTemporal() != Other->isNonTemporal())
15381       return false;
15382     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15383     // Allow merging constants of different types as integers.
15384     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15385                                            : Other->getMemoryVT() != MemVT;
15386     if (IsLoadSrc) {
15387       if (NoTypeMatch)
15388         return false;
15389       // The Load's Base Ptr must also match
15390       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15391         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15392         if (LoadVT != OtherLd->getMemoryVT())
15393           return false;
15394         // Loads must only have one use.
15395         if (!OtherLd->hasNUsesOfValue(1, 0))
15396           return false;
15397         // The memory operands must not be volatile/indexed.
15398         if (OtherLd->isVolatile() || OtherLd->isIndexed())
15399           return false;
15400         // Don't mix temporal loads with non-temporal loads.
15401         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15402           return false;
15403         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15404           return false;
15405       } else
15406         return false;
15407     }
15408     if (IsConstantSrc) {
15409       if (NoTypeMatch)
15410         return false;
15411       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15412         return false;
15413     }
15414     if (IsExtractVecSrc) {
15415       // Do not merge truncated stores here.
15416       if (Other->isTruncatingStore())
15417         return false;
15418       if (!MemVT.bitsEq(OtherBC.getValueType()))
15419         return false;
15420       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15421           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15422         return false;
15423     }
15424     Ptr = BaseIndexOffset::match(Other, DAG);
15425     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15426   };
15427
15428   // We looking for a root node which is an ancestor to all mergable
15429   // stores. We search up through a load, to our root and then down
15430   // through all children. For instance we will find Store{1,2,3} if
15431   // St is Store1, Store2. or Store3 where the root is not a load
15432   // which always true for nonvolatile ops. TODO: Expand
15433   // the search to find all valid candidates through multiple layers of loads.
15434   //
15435   // Root
15436   // |-------|-------|
15437   // Load    Load    Store3
15438   // |       |
15439   // Store1   Store2
15440   //
15441   // FIXME: We should be able to climb and
15442   // descend TokenFactors to find candidates as well.
15443
15444   RootNode = St->getChain().getNode();
15445
15446   unsigned NumNodesExplored = 0;
15447   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15448     RootNode = Ldn->getChain().getNode();
15449     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15450          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15451       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15452         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15453           if (I2.getOperandNo() == 0)
15454             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15455               BaseIndexOffset Ptr;
15456               int64_t PtrDiff;
15457               if (CandidateMatch(OtherST, Ptr, PtrDiff))
15458                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15459             }
15460   } else
15461     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15462          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15463       if (I.getOperandNo() == 0)
15464         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15465           BaseIndexOffset Ptr;
15466           int64_t PtrDiff;
15467           if (CandidateMatch(OtherST, Ptr, PtrDiff))
15468             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15469         }
15470 }
15471
15472 // We need to check that merging these stores does not cause a loop in
15473 // the DAG. Any store candidate may depend on another candidate
15474 // indirectly through its operand (we already consider dependencies
15475 // through the chain). Check in parallel by searching up from
15476 // non-chain operands of candidates.
15477 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15478     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15479     SDNode *RootNode) {
15480   // FIXME: We should be able to truncate a full search of
15481   // predecessors by doing a BFS and keeping tabs the originating
15482   // stores from which worklist nodes come from in a similar way to
15483   // TokenFactor simplfication.
15484
15485   SmallPtrSet<const SDNode *, 32> Visited;
15486   SmallVector<const SDNode *, 8> Worklist;
15487
15488   // RootNode is a predecessor to all candidates so we need not search
15489   // past it. Add RootNode (peeking through TokenFactors). Do not count
15490   // these towards size check.
15491
15492   Worklist.push_back(RootNode);
15493   while (!Worklist.empty()) {
15494     auto N = Worklist.pop_back_val();
15495     if (!Visited.insert(N).second)
15496       continue; // Already present in Visited.
15497     if (N->getOpcode() == ISD::TokenFactor) {
15498       for (SDValue Op : N->ops())
15499         Worklist.push_back(Op.getNode());
15500     }
15501   }
15502
15503   // Don't count pruning nodes towards max.
15504   unsigned int Max = 1024 + Visited.size();
15505   // Search Ops of store candidates.
15506   for (unsigned i = 0; i < NumStores; ++i) {
15507     SDNode *N = StoreNodes[i].MemNode;
15508     // Of the 4 Store Operands:
15509     //   * Chain (Op 0) -> We have already considered these
15510     //                    in candidate selection and can be
15511     //                    safely ignored
15512     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15513     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15514     //                       but aren't necessarily fromt the same base node, so
15515     //                       cycles possible (e.g. via indexed store).
15516     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15517     //               non-indexed stores). Not constant on all targets (e.g. ARM)
15518     //               and so can participate in a cycle.
15519     for (unsigned j = 1; j < N->getNumOperands(); ++j)
15520       Worklist.push_back(N->getOperand(j).getNode());
15521   }
15522   // Search through DAG. We can stop early if we find a store node.
15523   for (unsigned i = 0; i < NumStores; ++i)
15524     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15525                                      Max))
15526       return false;
15527   return true;
15528 }
15529
15530 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15531   if (OptLevel == CodeGenOpt::None)
15532     return false;
15533
15534   EVT MemVT = St->getMemoryVT();
15535   int64_t ElementSizeBytes = MemVT.getStoreSize();
15536   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15537
15538   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15539     return false;
15540
15541   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15542       Attribute::NoImplicitFloat);
15543
15544   // This function cannot currently deal with non-byte-sized memory sizes.
15545   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15546     return false;
15547
15548   if (!MemVT.isSimple())
15549     return false;
15550
15551   // Perform an early exit check. Do not bother looking at stored values that
15552   // are not constants, loads, or extracted vector elements.
15553   SDValue StoredVal = peekThroughBitcasts(St->getValue());
15554   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15555   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15556                        isa<ConstantFPSDNode>(StoredVal);
15557   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15558                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15559   bool IsNonTemporalStore = St->isNonTemporal();
15560   bool IsNonTemporalLoad =
15561       IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15562
15563   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15564     return false;
15565
15566   SmallVector<MemOpLink, 8> StoreNodes;
15567   SDNode *RootNode;
15568   // Find potential store merge candidates by searching through chain sub-DAG
15569   getStoreMergeCandidates(St, StoreNodes, RootNode);
15570
15571   // Check if there is anything to merge.
15572   if (StoreNodes.size() < 2)
15573     return false;
15574
15575   // Sort the memory operands according to their distance from the
15576   // base pointer.
15577   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15578     return LHS.OffsetFromBase < RHS.OffsetFromBase;
15579   });
15580
15581   // Store Merge attempts to merge the lowest stores. This generally
15582   // works out as if successful, as the remaining stores are checked
15583   // after the first collection of stores is merged. However, in the
15584   // case that a non-mergeable store is found first, e.g., {p[-2],
15585   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15586   // mergeable cases. To prevent this, we prune such stores from the
15587   // front of StoreNodes here.
15588
15589   bool RV = false;
15590   while (StoreNodes.size() > 1) {
15591     unsigned StartIdx = 0;
15592     while ((StartIdx + 1 < StoreNodes.size()) &&
15593            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15594                StoreNodes[StartIdx + 1].OffsetFromBase)
15595       ++StartIdx;
15596
15597     // Bail if we don't have enough candidates to merge.
15598     if (StartIdx + 1 >= StoreNodes.size())
15599       return RV;
15600
15601     if (StartIdx)
15602       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15603
15604     // Scan the memory operations on the chain and find the first
15605     // non-consecutive store memory address.
15606     unsigned NumConsecutiveStores = 1;
15607     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15608     // Check that the addresses are consecutive starting from the second
15609     // element in the list of stores.
15610     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15611       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15612       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15613         break;
15614       NumConsecutiveStores = i + 1;
15615     }
15616
15617     if (NumConsecutiveStores < 2) {
15618       StoreNodes.erase(StoreNodes.begin(),
15619                        StoreNodes.begin() + NumConsecutiveStores);
15620       continue;
15621     }
15622
15623     // The node with the lowest store address.
15624     LLVMContext &Context = *DAG.getContext();
15625     const DataLayout &DL = DAG.getDataLayout();
15626
15627     // Store the constants into memory as one consecutive store.
15628     if (IsConstantSrc) {
15629       while (NumConsecutiveStores >= 2) {
15630         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15631         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15632         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15633         unsigned LastLegalType = 1;
15634         unsigned LastLegalVectorType = 1;
15635         bool LastIntegerTrunc = false;
15636         bool NonZero = false;
15637         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15638         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15639           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15640           SDValue StoredVal = ST->getValue();
15641           bool IsElementZero = false;
15642           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15643             IsElementZero = C->isNullValue();
15644           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15645             IsElementZero = C->getConstantFPValue()->isNullValue();
15646           if (IsElementZero) {
15647             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15648               FirstZeroAfterNonZero = i;
15649           }
15650           NonZero |= !IsElementZero;
15651
15652           // Find a legal type for the constant store.
15653           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15654           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15655           bool IsFast = false;
15656
15657           // Break early when size is too large to be legal.
15658           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15659             break;
15660
15661           if (TLI.isTypeLegal(StoreTy) &&
15662               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15663               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15664                                      *FirstInChain->getMemOperand(), &IsFast) &&
15665               IsFast) {
15666             LastIntegerTrunc = false;
15667             LastLegalType = i + 1;
15668             // Or check whether a truncstore is legal.
15669           } else if (TLI.getTypeAction(Context, StoreTy) ==
15670                      TargetLowering::TypePromoteInteger) {
15671             EVT LegalizedStoredValTy =
15672                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15673             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15674                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15675                 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15676                                        *FirstInChain->getMemOperand(),
15677                                        &IsFast) &&
15678                 IsFast) {
15679               LastIntegerTrunc = true;
15680               LastLegalType = i + 1;
15681             }
15682           }
15683
15684           // We only use vectors if the constant is known to be zero or the
15685           // target allows it and the function is not marked with the
15686           // noimplicitfloat attribute.
15687           if ((!NonZero ||
15688                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15689               !NoVectors) {
15690             // Find a legal type for the vector store.
15691             unsigned Elts = (i + 1) * NumMemElts;
15692             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15693             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15694                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15695                 TLI.allowsMemoryAccess(
15696                     Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15697                 IsFast)
15698               LastLegalVectorType = i + 1;
15699           }
15700         }
15701
15702         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15703         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15704
15705         // Check if we found a legal integer type that creates a meaningful
15706         // merge.
15707         if (NumElem < 2) {
15708           // We know that candidate stores are in order and of correct
15709           // shape. While there is no mergeable sequence from the
15710           // beginning one may start later in the sequence. The only
15711           // reason a merge of size N could have failed where another of
15712           // the same size would not have, is if the alignment has
15713           // improved or we've dropped a non-zero value. Drop as many
15714           // candidates as we can here.
15715           unsigned NumSkip = 1;
15716           while (
15717               (NumSkip < NumConsecutiveStores) &&
15718               (NumSkip < FirstZeroAfterNonZero) &&
15719               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15720             NumSkip++;
15721
15722           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15723           NumConsecutiveStores -= NumSkip;
15724           continue;
15725         }
15726
15727         // Check that we can merge these candidates without causing a cycle.
15728         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15729                                                       RootNode)) {
15730           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15731           NumConsecutiveStores -= NumElem;
15732           continue;
15733         }
15734
15735         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15736                                               UseVector, LastIntegerTrunc);
15737
15738         // Remove merged stores for next iteration.
15739         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15740         NumConsecutiveStores -= NumElem;
15741       }
15742       continue;
15743     }
15744
15745     // When extracting multiple vector elements, try to store them
15746     // in one vector store rather than a sequence of scalar stores.
15747     if (IsExtractVecSrc) {
15748       // Loop on Consecutive Stores on success.
15749       while (NumConsecutiveStores >= 2) {
15750         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15751         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15752         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15753         unsigned NumStoresToMerge = 1;
15754         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15755           // Find a legal type for the vector store.
15756           unsigned Elts = (i + 1) * NumMemElts;
15757           EVT Ty =
15758               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15759           bool IsFast;
15760
15761           // Break early when size is too large to be legal.
15762           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15763             break;
15764
15765           if (TLI.isTypeLegal(Ty) &&
15766               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15767               TLI.allowsMemoryAccess(Context, DL, Ty,
15768                                      *FirstInChain->getMemOperand(), &IsFast) &&
15769               IsFast)
15770             NumStoresToMerge = i + 1;
15771         }
15772
15773         // Check if we found a legal integer type creating a meaningful
15774         // merge.
15775         if (NumStoresToMerge < 2) {
15776           // We know that candidate stores are in order and of correct
15777           // shape. While there is no mergeable sequence from the
15778           // beginning one may start later in the sequence. The only
15779           // reason a merge of size N could have failed where another of
15780           // the same size would not have, is if the alignment has
15781           // improved. Drop as many candidates as we can here.
15782           unsigned NumSkip = 1;
15783           while (
15784               (NumSkip < NumConsecutiveStores) &&
15785               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15786             NumSkip++;
15787
15788           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15789           NumConsecutiveStores -= NumSkip;
15790           continue;
15791         }
15792
15793         // Check that we can merge these candidates without causing a cycle.
15794         if (!checkMergeStoreCandidatesForDependencies(
15795                 StoreNodes, NumStoresToMerge, RootNode)) {
15796           StoreNodes.erase(StoreNodes.begin(),
15797                            StoreNodes.begin() + NumStoresToMerge);
15798           NumConsecutiveStores -= NumStoresToMerge;
15799           continue;
15800         }
15801
15802         RV |= MergeStoresOfConstantsOrVecElts(
15803             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15804
15805         StoreNodes.erase(StoreNodes.begin(),
15806                          StoreNodes.begin() + NumStoresToMerge);
15807         NumConsecutiveStores -= NumStoresToMerge;
15808       }
15809       continue;
15810     }
15811
15812     // Below we handle the case of multiple consecutive stores that
15813     // come from multiple consecutive loads. We merge them into a single
15814     // wide load and a single wide store.
15815
15816     // Look for load nodes which are used by the stored values.
15817     SmallVector<MemOpLink, 8> LoadNodes;
15818
15819     // Find acceptable loads. Loads need to have the same chain (token factor),
15820     // must not be zext, volatile, indexed, and they must be consecutive.
15821     BaseIndexOffset LdBasePtr;
15822
15823     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15824       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15825       SDValue Val = peekThroughBitcasts(St->getValue());
15826       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15827
15828       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15829       // If this is not the first ptr that we check.
15830       int64_t LdOffset = 0;
15831       if (LdBasePtr.getBase().getNode()) {
15832         // The base ptr must be the same.
15833         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15834           break;
15835       } else {
15836         // Check that all other base pointers are the same as this one.
15837         LdBasePtr = LdPtr;
15838       }
15839
15840       // We found a potential memory operand to merge.
15841       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15842     }
15843
15844     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15845       // If we have load/store pair instructions and we only have two values,
15846       // don't bother merging.
15847       unsigned RequiredAlignment;
15848       if (LoadNodes.size() == 2 &&
15849           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15850           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15851         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15852         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15853         break;
15854       }
15855       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15856       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15857       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15858       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15859       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15860
15861       // Scan the memory operations on the chain and find the first
15862       // non-consecutive load memory address. These variables hold the index in
15863       // the store node array.
15864
15865       unsigned LastConsecutiveLoad = 1;
15866
15867       // This variable refers to the size and not index in the array.
15868       unsigned LastLegalVectorType = 1;
15869       unsigned LastLegalIntegerType = 1;
15870       bool isDereferenceable = true;
15871       bool DoIntegerTruncate = false;
15872       StartAddress = LoadNodes[0].OffsetFromBase;
15873       SDValue FirstChain = FirstLoad->getChain();
15874       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15875         // All loads must share the same chain.
15876         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15877           break;
15878
15879         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15880         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15881           break;
15882         LastConsecutiveLoad = i;
15883
15884         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15885           isDereferenceable = false;
15886
15887         // Find a legal type for the vector store.
15888         unsigned Elts = (i + 1) * NumMemElts;
15889         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15890
15891         // Break early when size is too large to be legal.
15892         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15893           break;
15894
15895         bool IsFastSt, IsFastLd;
15896         if (TLI.isTypeLegal(StoreTy) &&
15897             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15898             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15899                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15900             IsFastSt &&
15901             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15902                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15903             IsFastLd) {
15904           LastLegalVectorType = i + 1;
15905         }
15906
15907         // Find a legal type for the integer store.
15908         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15909         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15910         if (TLI.isTypeLegal(StoreTy) &&
15911             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15912             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15913                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15914             IsFastSt &&
15915             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15916                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15917             IsFastLd) {
15918           LastLegalIntegerType = i + 1;
15919           DoIntegerTruncate = false;
15920           // Or check whether a truncstore and extload is legal.
15921         } else if (TLI.getTypeAction(Context, StoreTy) ==
15922                    TargetLowering::TypePromoteInteger) {
15923           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15924           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15925               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15926               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15927                                  StoreTy) &&
15928               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15929                                  StoreTy) &&
15930               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15931               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15932                                      *FirstInChain->getMemOperand(),
15933                                      &IsFastSt) &&
15934               IsFastSt &&
15935               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15936                                      *FirstLoad->getMemOperand(), &IsFastLd) &&
15937               IsFastLd) {
15938             LastLegalIntegerType = i + 1;
15939             DoIntegerTruncate = true;
15940           }
15941         }
15942       }
15943
15944       // Only use vector types if the vector type is larger than the integer
15945       // type. If they are the same, use integers.
15946       bool UseVectorTy =
15947           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15948       unsigned LastLegalType =
15949           std::max(LastLegalVectorType, LastLegalIntegerType);
15950
15951       // We add +1 here because the LastXXX variables refer to location while
15952       // the NumElem refers to array/index size.
15953       unsigned NumElem =
15954           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15955       NumElem = std::min(LastLegalType, NumElem);
15956
15957       if (NumElem < 2) {
15958         // We know that candidate stores are in order and of correct
15959         // shape. While there is no mergeable sequence from the
15960         // beginning one may start later in the sequence. The only
15961         // reason a merge of size N could have failed where another of
15962         // the same size would not have is if the alignment or either
15963         // the load or store has improved. Drop as many candidates as we
15964         // can here.
15965         unsigned NumSkip = 1;
15966         while ((NumSkip < LoadNodes.size()) &&
15967                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15968                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15969           NumSkip++;
15970         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15971         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15972         NumConsecutiveStores -= NumSkip;
15973         continue;
15974       }
15975
15976       // Check that we can merge these candidates without causing a cycle.
15977       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15978                                                     RootNode)) {
15979         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15980         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15981         NumConsecutiveStores -= NumElem;
15982         continue;
15983       }
15984
15985       // Find if it is better to use vectors or integers to load and store
15986       // to memory.
15987       EVT JointMemOpVT;
15988       if (UseVectorTy) {
15989         // Find a legal type for the vector store.
15990         unsigned Elts = NumElem * NumMemElts;
15991         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15992       } else {
15993         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15994         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15995       }
15996
15997       SDLoc LoadDL(LoadNodes[0].MemNode);
15998       SDLoc StoreDL(StoreNodes[0].MemNode);
15999
16000       // The merged loads are required to have the same incoming chain, so
16001       // using the first's chain is acceptable.
16002
16003       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16004       AddToWorklist(NewStoreChain.getNode());
16005
16006       MachineMemOperand::Flags LdMMOFlags =
16007           isDereferenceable ? MachineMemOperand::MODereferenceable
16008                             : MachineMemOperand::MONone;
16009       if (IsNonTemporalLoad)
16010         LdMMOFlags |= MachineMemOperand::MONonTemporal;
16011
16012       MachineMemOperand::Flags StMMOFlags =
16013           IsNonTemporalStore ? MachineMemOperand::MONonTemporal
16014                              : MachineMemOperand::MONone;
16015
16016       SDValue NewLoad, NewStore;
16017       if (UseVectorTy || !DoIntegerTruncate) {
16018         NewLoad =
16019             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
16020                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16021                         FirstLoadAlign, LdMMOFlags);
16022         NewStore = DAG.getStore(
16023             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16024             FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16025       } else { // This must be the truncstore/extload case
16026         EVT ExtendedTy =
16027             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16028         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16029                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
16030                                  FirstLoad->getPointerInfo(), JointMemOpVT,
16031                                  FirstLoadAlign, LdMMOFlags);
16032         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16033                                      FirstInChain->getBasePtr(),
16034                                      FirstInChain->getPointerInfo(),
16035                                      JointMemOpVT, FirstInChain->getAlignment(),
16036                                      FirstInChain->getMemOperand()->getFlags());
16037       }
16038
16039       // Transfer chain users from old loads to the new load.
16040       for (unsigned i = 0; i < NumElem; ++i) {
16041         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16042         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16043                                       SDValue(NewLoad.getNode(), 1));
16044       }
16045
16046       // Replace the all stores with the new store. Recursively remove
16047       // corresponding value if its no longer used.
16048       for (unsigned i = 0; i < NumElem; ++i) {
16049         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16050         CombineTo(StoreNodes[i].MemNode, NewStore);
16051         if (Val.getNode()->use_empty())
16052           recursivelyDeleteUnusedNodes(Val.getNode());
16053       }
16054
16055       RV = true;
16056       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16057       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16058       NumConsecutiveStores -= NumElem;
16059     }
16060   }
16061   return RV;
16062 }
16063
16064 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16065   SDLoc SL(ST);
16066   SDValue ReplStore;
16067
16068   // Replace the chain to avoid dependency.
16069   if (ST->isTruncatingStore()) {
16070     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16071                                   ST->getBasePtr(), ST->getMemoryVT(),
16072                                   ST->getMemOperand());
16073   } else {
16074     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16075                              ST->getMemOperand());
16076   }
16077
16078   // Create token to keep both nodes around.
16079   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16080                               MVT::Other, ST->getChain(), ReplStore);
16081
16082   // Make sure the new and old chains are cleaned up.
16083   AddToWorklist(Token.getNode());
16084
16085   // Don't add users to work list.
16086   return CombineTo(ST, Token, false);
16087 }
16088
16089 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16090   SDValue Value = ST->getValue();
16091   if (Value.getOpcode() == ISD::TargetConstantFP)
16092     return SDValue();
16093
16094   SDLoc DL(ST);
16095
16096   SDValue Chain = ST->getChain();
16097   SDValue Ptr = ST->getBasePtr();
16098
16099   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16100
16101   // NOTE: If the original store is volatile, this transform must not increase
16102   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16103   // processor operation but an i64 (which is not legal) requires two.  So the
16104   // transform should not be done in this case.
16105
16106   SDValue Tmp;
16107   switch (CFP->getSimpleValueType(0).SimpleTy) {
16108   default:
16109     llvm_unreachable("Unknown FP type");
16110   case MVT::f16:    // We don't do this for these yet.
16111   case MVT::f80:
16112   case MVT::f128:
16113   case MVT::ppcf128:
16114     return SDValue();
16115   case MVT::f32:
16116     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
16117         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16118       ;
16119       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16120                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16121                             MVT::i32);
16122       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16123     }
16124
16125     return SDValue();
16126   case MVT::f64:
16127     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16128          !ST->isVolatile()) ||
16129         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16130       ;
16131       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16132                             getZExtValue(), SDLoc(CFP), MVT::i64);
16133       return DAG.getStore(Chain, DL, Tmp,
16134                           Ptr, ST->getMemOperand());
16135     }
16136
16137     if (!ST->isVolatile() &&
16138         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16139       // Many FP stores are not made apparent until after legalize, e.g. for
16140       // argument passing.  Since this is so common, custom legalize the
16141       // 64-bit integer store into two 32-bit stores.
16142       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16143       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16144       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16145       if (DAG.getDataLayout().isBigEndian())
16146         std::swap(Lo, Hi);
16147
16148       unsigned Alignment = ST->getAlignment();
16149       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16150       AAMDNodes AAInfo = ST->getAAInfo();
16151
16152       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16153                                  ST->getAlignment(), MMOFlags, AAInfo);
16154       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16155                         DAG.getConstant(4, DL, Ptr.getValueType()));
16156       Alignment = MinAlign(Alignment, 4U);
16157       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16158                                  ST->getPointerInfo().getWithOffset(4),
16159                                  Alignment, MMOFlags, AAInfo);
16160       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16161                          St0, St1);
16162     }
16163
16164     return SDValue();
16165   }
16166 }
16167
16168 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16169   StoreSDNode *ST  = cast<StoreSDNode>(N);
16170   SDValue Chain = ST->getChain();
16171   SDValue Value = ST->getValue();
16172   SDValue Ptr   = ST->getBasePtr();
16173
16174   // If this is a store of a bit convert, store the input value if the
16175   // resultant store does not need a higher alignment than the original.
16176   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16177       ST->isUnindexed()) {
16178     EVT SVT = Value.getOperand(0).getValueType();
16179     // If the store is volatile, we only want to change the store type if the
16180     // resulting store is legal. Otherwise we might increase the number of
16181     // memory accesses. We don't care if the original type was legal or not
16182     // as we assume software couldn't rely on the number of accesses of an
16183     // illegal type.
16184     if (((!LegalOperations && !ST->isVolatile()) ||
16185          TLI.isOperationLegal(ISD::STORE, SVT)) &&
16186         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16187                                      DAG, *ST->getMemOperand())) {
16188       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16189                           ST->getPointerInfo(), ST->getAlignment(),
16190                           ST->getMemOperand()->getFlags(), ST->getAAInfo());
16191     }
16192   }
16193
16194   // Turn 'store undef, Ptr' -> nothing.
16195   if (Value.isUndef() && ST->isUnindexed())
16196     return Chain;
16197
16198   // Try to infer better alignment information than the store already has.
16199   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
16200     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16201       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16202         SDValue NewStore =
16203             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16204                               ST->getMemoryVT(), Align,
16205                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
16206         // NewStore will always be N as we are only refining the alignment
16207         assert(NewStore.getNode() == N);
16208         (void)NewStore;
16209       }
16210     }
16211   }
16212
16213   // Try transforming a pair floating point load / store ops to integer
16214   // load / store ops.
16215   if (SDValue NewST = TransformFPLoadStorePair(N))
16216     return NewST;
16217
16218   // Try transforming several stores into STORE (BSWAP).
16219   if (SDValue Store = MatchStoreCombine(ST))
16220     return Store;
16221
16222   if (ST->isUnindexed()) {
16223     // Walk up chain skipping non-aliasing memory nodes, on this store and any
16224     // adjacent stores.
16225     if (findBetterNeighborChains(ST)) {
16226       // replaceStoreChain uses CombineTo, which handled all of the worklist
16227       // manipulation. Return the original node to not do anything else.
16228       return SDValue(ST, 0);
16229     }
16230     Chain = ST->getChain();
16231   }
16232
16233   // FIXME: is there such a thing as a truncating indexed store?
16234   if (ST->isTruncatingStore() && ST->isUnindexed() &&
16235       Value.getValueType().isInteger() &&
16236       (!isa<ConstantSDNode>(Value) ||
16237        !cast<ConstantSDNode>(Value)->isOpaque())) {
16238     APInt TruncDemandedBits =
16239         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16240                              ST->getMemoryVT().getScalarSizeInBits());
16241
16242     // See if we can simplify the input to this truncstore with knowledge that
16243     // only the low bits are being used.  For example:
16244     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
16245     SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
16246     AddToWorklist(Value.getNode());
16247     if (Shorter)
16248       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16249                                ST->getMemOperand());
16250
16251     // Otherwise, see if we can simplify the operation with
16252     // SimplifyDemandedBits, which only works if the value has a single use.
16253     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16254       // Re-visit the store if anything changed and the store hasn't been merged
16255       // with another node (N is deleted) SimplifyDemandedBits will add Value's
16256       // node back to the worklist if necessary, but we also need to re-visit
16257       // the Store node itself.
16258       if (N->getOpcode() != ISD::DELETED_NODE)
16259         AddToWorklist(N);
16260       return SDValue(N, 0);
16261     }
16262   }
16263
16264   // If this is a load followed by a store to the same location, then the store
16265   // is dead/noop.
16266   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16267     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16268         ST->isUnindexed() && !ST->isVolatile() &&
16269         // There can't be any side effects between the load and store, such as
16270         // a call or store.
16271         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
16272       // The store is dead, remove it.
16273       return Chain;
16274     }
16275   }
16276
16277   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16278     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
16279         !ST1->isVolatile()) {
16280       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16281           ST->getMemoryVT() == ST1->getMemoryVT()) {
16282         // If this is a store followed by a store with the same value to the
16283         // same location, then the store is dead/noop.
16284         return Chain;
16285       }
16286
16287       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16288           !ST1->getBasePtr().isUndef()) {
16289         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16290         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16291         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16292         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16293         // If this is a store who's preceding store to a subset of the current
16294         // location and no one other node is chained to that store we can
16295         // effectively drop the store. Do not remove stores to undef as they may
16296         // be used as data sinks.
16297         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16298           CombineTo(ST1, ST1->getChain());
16299           return SDValue();
16300         }
16301
16302         // If ST stores to a subset of preceding store's write set, we may be
16303         // able to fold ST's value into the preceding stored value. As we know
16304         // the other uses of ST1's chain are unconcerned with ST, this folding
16305         // will not affect those nodes.
16306         int64_t BitOffset;
16307         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16308                                BitOffset)) {
16309           SDValue ChainValue = ST1->getValue();
16310           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16311             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16312               APInt Val = C1->getAPIntValue();
16313               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16314               // FIXME: Handle Big-endian mode.
16315               if (!DAG.getDataLayout().isBigEndian()) {
16316                 Val.insertBits(InsertVal, BitOffset);
16317                 SDValue NewSDVal =
16318                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16319                                     C1->isTargetOpcode(), C1->isOpaque());
16320                 SDNode *NewST1 = DAG.UpdateNodeOperands(
16321                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16322                     ST1->getOperand(3));
16323                 return CombineTo(ST, SDValue(NewST1, 0));
16324               }
16325             }
16326           }
16327         } // End ST subset of ST1 case.
16328       }
16329     }
16330   }
16331
16332   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16333   // truncating store.  We can do this even if this is already a truncstore.
16334   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16335       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16336       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16337                             ST->getMemoryVT())) {
16338     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16339                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
16340   }
16341
16342   // Always perform this optimization before types are legal. If the target
16343   // prefers, also try this after legalization to catch stores that were created
16344   // by intrinsics or other nodes.
16345   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16346     while (true) {
16347       // There can be multiple store sequences on the same chain.
16348       // Keep trying to merge store sequences until we are unable to do so
16349       // or until we merge the last store on the chain.
16350       bool Changed = MergeConsecutiveStores(ST);
16351       if (!Changed) break;
16352       // Return N as merge only uses CombineTo and no worklist clean
16353       // up is necessary.
16354       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16355         return SDValue(N, 0);
16356     }
16357   }
16358
16359   // Try transforming N to an indexed store.
16360   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16361     return SDValue(N, 0);
16362
16363   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16364   //
16365   // Make sure to do this only after attempting to merge stores in order to
16366   //  avoid changing the types of some subset of stores due to visit order,
16367   //  preventing their merging.
16368   if (isa<ConstantFPSDNode>(ST->getValue())) {
16369     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16370       return NewSt;
16371   }
16372
16373   if (SDValue NewSt = splitMergedValStore(ST))
16374     return NewSt;
16375
16376   return ReduceLoadOpStoreWidth(N);
16377 }
16378
16379 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16380   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16381   if (!LifetimeEnd->hasOffset())
16382     return SDValue();
16383
16384   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16385                                         LifetimeEnd->getOffset(), false);
16386
16387   // We walk up the chains to find stores.
16388   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16389   while (!Chains.empty()) {
16390     SDValue Chain = Chains.back();
16391     Chains.pop_back();
16392     if (!Chain.hasOneUse())
16393       continue;
16394     switch (Chain.getOpcode()) {
16395     case ISD::TokenFactor:
16396       for (unsigned Nops = Chain.getNumOperands(); Nops;)
16397         Chains.push_back(Chain.getOperand(--Nops));
16398       break;
16399     case ISD::LIFETIME_START:
16400     case ISD::LIFETIME_END:
16401       // We can forward past any lifetime start/end that can be proven not to
16402       // alias the node.
16403       if (!isAlias(Chain.getNode(), N))
16404         Chains.push_back(Chain.getOperand(0));
16405       break;
16406     case ISD::STORE: {
16407       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16408       if (ST->isVolatile() || ST->isIndexed())
16409         continue;
16410       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16411       // If we store purely within object bounds just before its lifetime ends,
16412       // we can remove the store.
16413       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16414                                    ST->getMemoryVT().getStoreSizeInBits())) {
16415         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16416                    dbgs() << "\nwithin LIFETIME_END of : ";
16417                    LifetimeEndBase.dump(); dbgs() << "\n");
16418         CombineTo(ST, ST->getChain());
16419         return SDValue(N, 0);
16420       }
16421     }
16422     }
16423   }
16424   return SDValue();
16425 }
16426
16427 /// For the instruction sequence of store below, F and I values
16428 /// are bundled together as an i64 value before being stored into memory.
16429 /// Sometimes it is more efficent to generate separate stores for F and I,
16430 /// which can remove the bitwise instructions or sink them to colder places.
16431 ///
16432 ///   (store (or (zext (bitcast F to i32) to i64),
16433 ///              (shl (zext I to i64), 32)), addr)  -->
16434 ///   (store F, addr) and (store I, addr+4)
16435 ///
16436 /// Similarly, splitting for other merged store can also be beneficial, like:
16437 /// For pair of {i32, i32}, i64 store --> two i32 stores.
16438 /// For pair of {i32, i16}, i64 store --> two i32 stores.
16439 /// For pair of {i16, i16}, i32 store --> two i16 stores.
16440 /// For pair of {i16, i8},  i32 store --> two i16 stores.
16441 /// For pair of {i8, i8},   i16 store --> two i8 stores.
16442 ///
16443 /// We allow each target to determine specifically which kind of splitting is
16444 /// supported.
16445 ///
16446 /// The store patterns are commonly seen from the simple code snippet below
16447 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16448 ///   void goo(const std::pair<int, float> &);
16449 ///   hoo() {
16450 ///     ...
16451 ///     goo(std::make_pair(tmp, ftmp));
16452 ///     ...
16453 ///   }
16454 ///
16455 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16456   if (OptLevel == CodeGenOpt::None)
16457     return SDValue();
16458
16459   SDValue Val = ST->getValue();
16460   SDLoc DL(ST);
16461
16462   // Match OR operand.
16463   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16464     return SDValue();
16465
16466   // Match SHL operand and get Lower and Higher parts of Val.
16467   SDValue Op1 = Val.getOperand(0);
16468   SDValue Op2 = Val.getOperand(1);
16469   SDValue Lo, Hi;
16470   if (Op1.getOpcode() != ISD::SHL) {
16471     std::swap(Op1, Op2);
16472     if (Op1.getOpcode() != ISD::SHL)
16473       return SDValue();
16474   }
16475   Lo = Op2;
16476   Hi = Op1.getOperand(0);
16477   if (!Op1.hasOneUse())
16478     return SDValue();
16479
16480   // Match shift amount to HalfValBitSize.
16481   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16482   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16483   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16484     return SDValue();
16485
16486   // Lo and Hi are zero-extended from int with size less equal than 32
16487   // to i64.
16488   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16489       !Lo.getOperand(0).getValueType().isScalarInteger() ||
16490       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16491       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16492       !Hi.getOperand(0).getValueType().isScalarInteger() ||
16493       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16494     return SDValue();
16495
16496   // Use the EVT of low and high parts before bitcast as the input
16497   // of target query.
16498   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16499                   ? Lo.getOperand(0).getValueType()
16500                   : Lo.getValueType();
16501   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16502                    ? Hi.getOperand(0).getValueType()
16503                    : Hi.getValueType();
16504   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16505     return SDValue();
16506
16507   // Start to split store.
16508   unsigned Alignment = ST->getAlignment();
16509   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16510   AAMDNodes AAInfo = ST->getAAInfo();
16511
16512   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16513   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16514   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16515   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16516
16517   SDValue Chain = ST->getChain();
16518   SDValue Ptr = ST->getBasePtr();
16519   // Lower value store.
16520   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16521                              ST->getAlignment(), MMOFlags, AAInfo);
16522   Ptr =
16523       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16524                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16525   // Higher value store.
16526   SDValue St1 =
16527       DAG.getStore(St0, DL, Hi, Ptr,
16528                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16529                    Alignment / 2, MMOFlags, AAInfo);
16530   return St1;
16531 }
16532
16533 /// Convert a disguised subvector insertion into a shuffle:
16534 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
16535 /// bitcast(shuffle (bitcast V), (extended X), Mask)
16536 /// Note: We do not use an insert_subvector node because that requires a legal
16537 /// subvector type.
16538 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16539   SDValue InsertVal = N->getOperand(1);
16540   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16541       !InsertVal.getOperand(0).getValueType().isVector())
16542     return SDValue();
16543
16544   SDValue SubVec = InsertVal.getOperand(0);
16545   SDValue DestVec = N->getOperand(0);
16546   EVT SubVecVT = SubVec.getValueType();
16547   EVT VT = DestVec.getValueType();
16548   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16549   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16550   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16551
16552   // Step 1: Create a shuffle mask that implements this insert operation. The
16553   // vector that we are inserting into will be operand 0 of the shuffle, so
16554   // those elements are just 'i'. The inserted subvector is in the first
16555   // positions of operand 1 of the shuffle. Example:
16556   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16557   SmallVector<int, 16> Mask(NumMaskVals);
16558   for (unsigned i = 0; i != NumMaskVals; ++i) {
16559     if (i / NumSrcElts == InsIndex)
16560       Mask[i] = (i % NumSrcElts) + NumMaskVals;
16561     else
16562       Mask[i] = i;
16563   }
16564
16565   // Bail out if the target can not handle the shuffle we want to create.
16566   EVT SubVecEltVT = SubVecVT.getVectorElementType();
16567   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16568   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16569     return SDValue();
16570
16571   // Step 2: Create a wide vector from the inserted source vector by appending
16572   // undefined elements. This is the same size as our destination vector.
16573   SDLoc DL(N);
16574   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16575   ConcatOps[0] = SubVec;
16576   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16577
16578   // Step 3: Shuffle in the padded subvector.
16579   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16580   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16581   AddToWorklist(PaddedSubV.getNode());
16582   AddToWorklist(DestVecBC.getNode());
16583   AddToWorklist(Shuf.getNode());
16584   return DAG.getBitcast(VT, Shuf);
16585 }
16586
16587 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16588   SDValue InVec = N->getOperand(0);
16589   SDValue InVal = N->getOperand(1);
16590   SDValue EltNo = N->getOperand(2);
16591   SDLoc DL(N);
16592
16593   // If the inserted element is an UNDEF, just use the input vector.
16594   if (InVal.isUndef())
16595     return InVec;
16596
16597   EVT VT = InVec.getValueType();
16598   unsigned NumElts = VT.getVectorNumElements();
16599
16600   // Remove redundant insertions:
16601   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16602   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16603       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16604     return InVec;
16605
16606   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16607   if (!IndexC) {
16608     // If this is variable insert to undef vector, it might be better to splat:
16609     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16610     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16611       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16612       return DAG.getBuildVector(VT, DL, Ops);
16613     }
16614     return SDValue();
16615   }
16616
16617   // We must know which element is being inserted for folds below here.
16618   unsigned Elt = IndexC->getZExtValue();
16619   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16620     return Shuf;
16621
16622   // Canonicalize insert_vector_elt dag nodes.
16623   // Example:
16624   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16625   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16626   //
16627   // Do this only if the child insert_vector node has one use; also
16628   // do this only if indices are both constants and Idx1 < Idx0.
16629   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16630       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16631     unsigned OtherElt = InVec.getConstantOperandVal(2);
16632     if (Elt < OtherElt) {
16633       // Swap nodes.
16634       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16635                                   InVec.getOperand(0), InVal, EltNo);
16636       AddToWorklist(NewOp.getNode());
16637       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16638                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16639     }
16640   }
16641
16642   // If we can't generate a legal BUILD_VECTOR, exit
16643   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16644     return SDValue();
16645
16646   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16647   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16648   // vector elements.
16649   SmallVector<SDValue, 8> Ops;
16650   // Do not combine these two vectors if the output vector will not replace
16651   // the input vector.
16652   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16653     Ops.append(InVec.getNode()->op_begin(),
16654                InVec.getNode()->op_end());
16655   } else if (InVec.isUndef()) {
16656     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16657   } else {
16658     return SDValue();
16659   }
16660   assert(Ops.size() == NumElts && "Unexpected vector size");
16661
16662   // Insert the element
16663   if (Elt < Ops.size()) {
16664     // All the operands of BUILD_VECTOR must have the same type;
16665     // we enforce that here.
16666     EVT OpVT = Ops[0].getValueType();
16667     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16668   }
16669
16670   // Return the new vector
16671   return DAG.getBuildVector(VT, DL, Ops);
16672 }
16673
16674 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16675                                                   SDValue EltNo,
16676                                                   LoadSDNode *OriginalLoad) {
16677   assert(!OriginalLoad->isVolatile());
16678
16679   EVT ResultVT = EVE->getValueType(0);
16680   EVT VecEltVT = InVecVT.getVectorElementType();
16681   unsigned Align = OriginalLoad->getAlignment();
16682   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16683       VecEltVT.getTypeForEVT(*DAG.getContext()));
16684
16685   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16686     return SDValue();
16687
16688   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16689     ISD::NON_EXTLOAD : ISD::EXTLOAD;
16690   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16691     return SDValue();
16692
16693   Align = NewAlign;
16694
16695   SDValue NewPtr = OriginalLoad->getBasePtr();
16696   SDValue Offset;
16697   EVT PtrType = NewPtr.getValueType();
16698   MachinePointerInfo MPI;
16699   SDLoc DL(EVE);
16700   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16701     int Elt = ConstEltNo->getZExtValue();
16702     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16703     Offset = DAG.getConstant(PtrOff, DL, PtrType);
16704     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16705   } else {
16706     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16707     Offset = DAG.getNode(
16708         ISD::MUL, DL, PtrType, Offset,
16709         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16710     // Discard the pointer info except the address space because the memory
16711     // operand can't represent this new access since the offset is variable.
16712     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16713   }
16714   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16715
16716   // The replacement we need to do here is a little tricky: we need to
16717   // replace an extractelement of a load with a load.
16718   // Use ReplaceAllUsesOfValuesWith to do the replacement.
16719   // Note that this replacement assumes that the extractvalue is the only
16720   // use of the load; that's okay because we don't want to perform this
16721   // transformation in other cases anyway.
16722   SDValue Load;
16723   SDValue Chain;
16724   if (ResultVT.bitsGT(VecEltVT)) {
16725     // If the result type of vextract is wider than the load, then issue an
16726     // extending load instead.
16727     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16728                                                   VecEltVT)
16729                                    ? ISD::ZEXTLOAD
16730                                    : ISD::EXTLOAD;
16731     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16732                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16733                           Align, OriginalLoad->getMemOperand()->getFlags(),
16734                           OriginalLoad->getAAInfo());
16735     Chain = Load.getValue(1);
16736   } else {
16737     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16738                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16739                        OriginalLoad->getAAInfo());
16740     Chain = Load.getValue(1);
16741     if (ResultVT.bitsLT(VecEltVT))
16742       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16743     else
16744       Load = DAG.getBitcast(ResultVT, Load);
16745   }
16746   WorklistRemover DeadNodes(*this);
16747   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16748   SDValue To[] = { Load, Chain };
16749   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16750   // Since we're explicitly calling ReplaceAllUses, add the new node to the
16751   // worklist explicitly as well.
16752   AddToWorklist(Load.getNode());
16753   AddUsersToWorklist(Load.getNode()); // Add users too
16754   // Make sure to revisit this node to clean it up; it will usually be dead.
16755   AddToWorklist(EVE);
16756   ++OpsNarrowed;
16757   return SDValue(EVE, 0);
16758 }
16759
16760 /// Transform a vector binary operation into a scalar binary operation by moving
16761 /// the math/logic after an extract element of a vector.
16762 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16763                                        bool LegalOperations) {
16764   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16765   SDValue Vec = ExtElt->getOperand(0);
16766   SDValue Index = ExtElt->getOperand(1);
16767   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16768   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
16769       Vec.getNode()->getNumValues() != 1)
16770     return SDValue();
16771
16772   // Targets may want to avoid this to prevent an expensive register transfer.
16773   if (!TLI.shouldScalarizeBinop(Vec))
16774     return SDValue();
16775
16776   // Extracting an element of a vector constant is constant-folded, so this
16777   // transform is just replacing a vector op with a scalar op while moving the
16778   // extract.
16779   SDValue Op0 = Vec.getOperand(0);
16780   SDValue Op1 = Vec.getOperand(1);
16781   if (isAnyConstantBuildVector(Op0, true) ||
16782       isAnyConstantBuildVector(Op1, true)) {
16783     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16784     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16785     SDLoc DL(ExtElt);
16786     EVT VT = ExtElt->getValueType(0);
16787     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16788     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16789     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16790   }
16791
16792   return SDValue();
16793 }
16794
16795 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16796   SDValue VecOp = N->getOperand(0);
16797   SDValue Index = N->getOperand(1);
16798   EVT ScalarVT = N->getValueType(0);
16799   EVT VecVT = VecOp.getValueType();
16800   if (VecOp.isUndef())
16801     return DAG.getUNDEF(ScalarVT);
16802
16803   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16804   //
16805   // This only really matters if the index is non-constant since other combines
16806   // on the constant elements already work.
16807   SDLoc DL(N);
16808   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16809       Index == VecOp.getOperand(2)) {
16810     SDValue Elt = VecOp.getOperand(1);
16811     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16812   }
16813
16814   // (vextract (scalar_to_vector val, 0) -> val
16815   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16816     // Check if the result type doesn't match the inserted element type. A
16817     // SCALAR_TO_VECTOR may truncate the inserted element and the
16818     // EXTRACT_VECTOR_ELT may widen the extracted vector.
16819     SDValue InOp = VecOp.getOperand(0);
16820     if (InOp.getValueType() != ScalarVT) {
16821       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16822       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16823     }
16824     return InOp;
16825   }
16826
16827   // extract_vector_elt of out-of-bounds element -> UNDEF
16828   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16829   unsigned NumElts = VecVT.getVectorNumElements();
16830   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16831     return DAG.getUNDEF(ScalarVT);
16832
16833   // extract_vector_elt (build_vector x, y), 1 -> y
16834   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16835       TLI.isTypeLegal(VecVT) &&
16836       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16837     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16838     EVT InEltVT = Elt.getValueType();
16839
16840     // Sometimes build_vector's scalar input types do not match result type.
16841     if (ScalarVT == InEltVT)
16842       return Elt;
16843
16844     // TODO: It may be useful to truncate if free if the build_vector implicitly
16845     // converts.
16846   }
16847
16848   // TODO: These transforms should not require the 'hasOneUse' restriction, but
16849   // there are regressions on multiple targets without it. We can end up with a
16850   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16851   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16852       VecOp.hasOneUse()) {
16853     // The vector index of the LSBs of the source depend on the endian-ness.
16854     bool IsLE = DAG.getDataLayout().isLittleEndian();
16855     unsigned ExtractIndex = IndexC->getZExtValue();
16856     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16857     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16858     SDValue BCSrc = VecOp.getOperand(0);
16859     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16860       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16861
16862     if (LegalTypes && BCSrc.getValueType().isInteger() &&
16863         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16864       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16865       // trunc i64 X to i32
16866       SDValue X = BCSrc.getOperand(0);
16867       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16868              "Extract element and scalar to vector can't change element type "
16869              "from FP to integer.");
16870       unsigned XBitWidth = X.getValueSizeInBits();
16871       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16872       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16873
16874       // An extract element return value type can be wider than its vector
16875       // operand element type. In that case, the high bits are undefined, so
16876       // it's possible that we may need to extend rather than truncate.
16877       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16878         assert(XBitWidth % VecEltBitWidth == 0 &&
16879                "Scalar bitwidth must be a multiple of vector element bitwidth");
16880         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16881       }
16882     }
16883   }
16884
16885   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16886     return BO;
16887
16888   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16889   // We only perform this optimization before the op legalization phase because
16890   // we may introduce new vector instructions which are not backed by TD
16891   // patterns. For example on AVX, extracting elements from a wide vector
16892   // without using extract_subvector. However, if we can find an underlying
16893   // scalar value, then we can always use that.
16894   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16895     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16896     // Find the new index to extract from.
16897     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16898
16899     // Extracting an undef index is undef.
16900     if (OrigElt == -1)
16901       return DAG.getUNDEF(ScalarVT);
16902
16903     // Select the right vector half to extract from.
16904     SDValue SVInVec;
16905     if (OrigElt < (int)NumElts) {
16906       SVInVec = VecOp.getOperand(0);
16907     } else {
16908       SVInVec = VecOp.getOperand(1);
16909       OrigElt -= NumElts;
16910     }
16911
16912     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16913       SDValue InOp = SVInVec.getOperand(OrigElt);
16914       if (InOp.getValueType() != ScalarVT) {
16915         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16916         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16917       }
16918
16919       return InOp;
16920     }
16921
16922     // FIXME: We should handle recursing on other vector shuffles and
16923     // scalar_to_vector here as well.
16924
16925     if (!LegalOperations ||
16926         // FIXME: Should really be just isOperationLegalOrCustom.
16927         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16928         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16929       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16930       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16931                          DAG.getConstant(OrigElt, DL, IndexTy));
16932     }
16933   }
16934
16935   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16936   // simplify it based on the (valid) extraction indices.
16937   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16938         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16939                Use->getOperand(0) == VecOp &&
16940                isa<ConstantSDNode>(Use->getOperand(1));
16941       })) {
16942     APInt DemandedElts = APInt::getNullValue(NumElts);
16943     for (SDNode *Use : VecOp->uses()) {
16944       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16945       if (CstElt->getAPIntValue().ult(NumElts))
16946         DemandedElts.setBit(CstElt->getZExtValue());
16947     }
16948     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16949       // We simplified the vector operand of this extract element. If this
16950       // extract is not dead, visit it again so it is folded properly.
16951       if (N->getOpcode() != ISD::DELETED_NODE)
16952         AddToWorklist(N);
16953       return SDValue(N, 0);
16954     }
16955   }
16956
16957   // Everything under here is trying to match an extract of a loaded value.
16958   // If the result of load has to be truncated, then it's not necessarily
16959   // profitable.
16960   bool BCNumEltsChanged = false;
16961   EVT ExtVT = VecVT.getVectorElementType();
16962   EVT LVT = ExtVT;
16963   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16964     return SDValue();
16965
16966   if (VecOp.getOpcode() == ISD::BITCAST) {
16967     // Don't duplicate a load with other uses.
16968     if (!VecOp.hasOneUse())
16969       return SDValue();
16970
16971     EVT BCVT = VecOp.getOperand(0).getValueType();
16972     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16973       return SDValue();
16974     if (NumElts != BCVT.getVectorNumElements())
16975       BCNumEltsChanged = true;
16976     VecOp = VecOp.getOperand(0);
16977     ExtVT = BCVT.getVectorElementType();
16978   }
16979
16980   // extract (vector load $addr), i --> load $addr + i * size
16981   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16982       ISD::isNormalLoad(VecOp.getNode()) &&
16983       !Index->hasPredecessor(VecOp.getNode())) {
16984     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16985     if (VecLoad && !VecLoad->isVolatile())
16986       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16987   }
16988
16989   // Perform only after legalization to ensure build_vector / vector_shuffle
16990   // optimizations have already been done.
16991   if (!LegalOperations || !IndexC)
16992     return SDValue();
16993
16994   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16995   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16996   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16997   int Elt = IndexC->getZExtValue();
16998   LoadSDNode *LN0 = nullptr;
16999   if (ISD::isNormalLoad(VecOp.getNode())) {
17000     LN0 = cast<LoadSDNode>(VecOp);
17001   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17002              VecOp.getOperand(0).getValueType() == ExtVT &&
17003              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17004     // Don't duplicate a load with other uses.
17005     if (!VecOp.hasOneUse())
17006       return SDValue();
17007
17008     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17009   }
17010   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17011     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17012     // =>
17013     // (load $addr+1*size)
17014
17015     // Don't duplicate a load with other uses.
17016     if (!VecOp.hasOneUse())
17017       return SDValue();
17018
17019     // If the bit convert changed the number of elements, it is unsafe
17020     // to examine the mask.
17021     if (BCNumEltsChanged)
17022       return SDValue();
17023
17024     // Select the input vector, guarding against out of range extract vector.
17025     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17026     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17027
17028     if (VecOp.getOpcode() == ISD::BITCAST) {
17029       // Don't duplicate a load with other uses.
17030       if (!VecOp.hasOneUse())
17031         return SDValue();
17032
17033       VecOp = VecOp.getOperand(0);
17034     }
17035     if (ISD::isNormalLoad(VecOp.getNode())) {
17036       LN0 = cast<LoadSDNode>(VecOp);
17037       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17038       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17039     }
17040   }
17041
17042   // Make sure we found a non-volatile load and the extractelement is
17043   // the only use.
17044   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
17045     return SDValue();
17046
17047   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17048   if (Elt == -1)
17049     return DAG.getUNDEF(LVT);
17050
17051   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17052 }
17053
17054 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17055 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17056   // We perform this optimization post type-legalization because
17057   // the type-legalizer often scalarizes integer-promoted vectors.
17058   // Performing this optimization before may create bit-casts which
17059   // will be type-legalized to complex code sequences.
17060   // We perform this optimization only before the operation legalizer because we
17061   // may introduce illegal operations.
17062   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
17063     return SDValue();
17064
17065   unsigned NumInScalars = N->getNumOperands();
17066   SDLoc DL(N);
17067   EVT VT = N->getValueType(0);
17068
17069   // Check to see if this is a BUILD_VECTOR of a bunch of values
17070   // which come from any_extend or zero_extend nodes. If so, we can create
17071   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17072   // optimizations. We do not handle sign-extend because we can't fill the sign
17073   // using shuffles.
17074   EVT SourceType = MVT::Other;
17075   bool AllAnyExt = true;
17076
17077   for (unsigned i = 0; i != NumInScalars; ++i) {
17078     SDValue In = N->getOperand(i);
17079     // Ignore undef inputs.
17080     if (In.isUndef()) continue;
17081
17082     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
17083     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17084
17085     // Abort if the element is not an extension.
17086     if (!ZeroExt && !AnyExt) {
17087       SourceType = MVT::Other;
17088       break;
17089     }
17090
17091     // The input is a ZeroExt or AnyExt. Check the original type.
17092     EVT InTy = In.getOperand(0).getValueType();
17093
17094     // Check that all of the widened source types are the same.
17095     if (SourceType == MVT::Other)
17096       // First time.
17097       SourceType = InTy;
17098     else if (InTy != SourceType) {
17099       // Multiple income types. Abort.
17100       SourceType = MVT::Other;
17101       break;
17102     }
17103
17104     // Check if all of the extends are ANY_EXTENDs.
17105     AllAnyExt &= AnyExt;
17106   }
17107
17108   // In order to have valid types, all of the inputs must be extended from the
17109   // same source type and all of the inputs must be any or zero extend.
17110   // Scalar sizes must be a power of two.
17111   EVT OutScalarTy = VT.getScalarType();
17112   bool ValidTypes = SourceType != MVT::Other &&
17113                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
17114                  isPowerOf2_32(SourceType.getSizeInBits());
17115
17116   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17117   // turn into a single shuffle instruction.
17118   if (!ValidTypes)
17119     return SDValue();
17120
17121   bool isLE = DAG.getDataLayout().isLittleEndian();
17122   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17123   assert(ElemRatio > 1 && "Invalid element size ratio");
17124   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
17125                                DAG.getConstant(0, DL, SourceType);
17126
17127   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17128   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17129
17130   // Populate the new build_vector
17131   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17132     SDValue Cast = N->getOperand(i);
17133     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
17134             Cast.getOpcode() == ISD::ZERO_EXTEND ||
17135             Cast.isUndef()) && "Invalid cast opcode");
17136     SDValue In;
17137     if (Cast.isUndef())
17138       In = DAG.getUNDEF(SourceType);
17139     else
17140       In = Cast->getOperand(0);
17141     unsigned Index = isLE ? (i * ElemRatio) :
17142                             (i * ElemRatio + (ElemRatio - 1));
17143
17144     assert(Index < Ops.size() && "Invalid index");
17145     Ops[Index] = In;
17146   }
17147
17148   // The type of the new BUILD_VECTOR node.
17149   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17150   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17151          "Invalid vector size");
17152   // Check if the new vector type is legal.
17153   if (!isTypeLegal(VecVT) ||
17154       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17155        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
17156     return SDValue();
17157
17158   // Make the new BUILD_VECTOR.
17159   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17160
17161   // The new BUILD_VECTOR node has the potential to be further optimized.
17162   AddToWorklist(BV.getNode());
17163   // Bitcast to the desired type.
17164   return DAG.getBitcast(VT, BV);
17165 }
17166
17167 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17168                                            ArrayRef<int> VectorMask,
17169                                            SDValue VecIn1, SDValue VecIn2,
17170                                            unsigned LeftIdx, bool DidSplitVec) {
17171   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17172   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
17173
17174   EVT VT = N->getValueType(0);
17175   EVT InVT1 = VecIn1.getValueType();
17176   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17177
17178   unsigned NumElems = VT.getVectorNumElements();
17179   unsigned ShuffleNumElems = NumElems;
17180
17181   // If we artificially split a vector in two already, then the offsets in the
17182   // operands will all be based off of VecIn1, even those in VecIn2.
17183   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17184
17185   // We can't generate a shuffle node with mismatched input and output types.
17186   // Try to make the types match the type of the output.
17187   if (InVT1 != VT || InVT2 != VT) {
17188     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17189       // If the output vector length is a multiple of both input lengths,
17190       // we can concatenate them and pad the rest with undefs.
17191       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17192       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17193       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17194       ConcatOps[0] = VecIn1;
17195       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17196       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17197       VecIn2 = SDValue();
17198     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17199       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17200         return SDValue();
17201
17202       if (!VecIn2.getNode()) {
17203         // If we only have one input vector, and it's twice the size of the
17204         // output, split it in two.
17205         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17206                              DAG.getConstant(NumElems, DL, IdxTy));
17207         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17208         // Since we now have shorter input vectors, adjust the offset of the
17209         // second vector's start.
17210         Vec2Offset = NumElems;
17211       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17212         // VecIn1 is wider than the output, and we have another, possibly
17213         // smaller input. Pad the smaller input with undefs, shuffle at the
17214         // input vector width, and extract the output.
17215         // The shuffle type is different than VT, so check legality again.
17216         if (LegalOperations &&
17217             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
17218           return SDValue();
17219
17220         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17221         // lower it back into a BUILD_VECTOR. So if the inserted type is
17222         // illegal, don't even try.
17223         if (InVT1 != InVT2) {
17224           if (!TLI.isTypeLegal(InVT2))
17225             return SDValue();
17226           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17227                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17228         }
17229         ShuffleNumElems = NumElems * 2;
17230       } else {
17231         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17232         // than VecIn1. We can't handle this for now - this case will disappear
17233         // when we start sorting the vectors by type.
17234         return SDValue();
17235       }
17236     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17237                InVT1.getSizeInBits() == VT.getSizeInBits()) {
17238       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17239       ConcatOps[0] = VecIn2;
17240       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17241     } else {
17242       // TODO: Support cases where the length mismatch isn't exactly by a
17243       // factor of 2.
17244       // TODO: Move this check upwards, so that if we have bad type
17245       // mismatches, we don't create any DAG nodes.
17246       return SDValue();
17247     }
17248   }
17249
17250   // Initialize mask to undef.
17251   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17252
17253   // Only need to run up to the number of elements actually used, not the
17254   // total number of elements in the shuffle - if we are shuffling a wider
17255   // vector, the high lanes should be set to undef.
17256   for (unsigned i = 0; i != NumElems; ++i) {
17257     if (VectorMask[i] <= 0)
17258       continue;
17259
17260     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17261     if (VectorMask[i] == (int)LeftIdx) {
17262       Mask[i] = ExtIndex;
17263     } else if (VectorMask[i] == (int)LeftIdx + 1) {
17264       Mask[i] = Vec2Offset + ExtIndex;
17265     }
17266   }
17267
17268   // The type the input vectors may have changed above.
17269   InVT1 = VecIn1.getValueType();
17270
17271   // If we already have a VecIn2, it should have the same type as VecIn1.
17272   // If we don't, get an undef/zero vector of the appropriate type.
17273   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17274   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17275
17276   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17277   if (ShuffleNumElems > NumElems)
17278     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17279
17280   return Shuffle;
17281 }
17282
17283 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17284   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17285
17286   // First, determine where the build vector is not undef.
17287   // TODO: We could extend this to handle zero elements as well as undefs.
17288   int NumBVOps = BV->getNumOperands();
17289   int ZextElt = -1;
17290   for (int i = 0; i != NumBVOps; ++i) {
17291     SDValue Op = BV->getOperand(i);
17292     if (Op.isUndef())
17293       continue;
17294     if (ZextElt == -1)
17295       ZextElt = i;
17296     else
17297       return SDValue();
17298   }
17299   // Bail out if there's no non-undef element.
17300   if (ZextElt == -1)
17301     return SDValue();
17302
17303   // The build vector contains some number of undef elements and exactly
17304   // one other element. That other element must be a zero-extended scalar
17305   // extracted from a vector at a constant index to turn this into a shuffle.
17306   // Also, require that the build vector does not implicitly truncate/extend
17307   // its elements.
17308   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17309   EVT VT = BV->getValueType(0);
17310   SDValue Zext = BV->getOperand(ZextElt);
17311   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17312       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17313       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17314       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
17315     return SDValue();
17316
17317   // The zero-extend must be a multiple of the source size, and we must be
17318   // building a vector of the same size as the source of the extract element.
17319   SDValue Extract = Zext.getOperand(0);
17320   unsigned DestSize = Zext.getValueSizeInBits();
17321   unsigned SrcSize = Extract.getValueSizeInBits();
17322   if (DestSize % SrcSize != 0 ||
17323       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17324     return SDValue();
17325
17326   // Create a shuffle mask that will combine the extracted element with zeros
17327   // and undefs.
17328   int ZextRatio = DestSize / SrcSize;
17329   int NumMaskElts = NumBVOps * ZextRatio;
17330   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17331   for (int i = 0; i != NumMaskElts; ++i) {
17332     if (i / ZextRatio == ZextElt) {
17333       // The low bits of the (potentially translated) extracted element map to
17334       // the source vector. The high bits map to zero. We will use a zero vector
17335       // as the 2nd source operand of the shuffle, so use the 1st element of
17336       // that vector (mask value is number-of-elements) for the high bits.
17337       if (i % ZextRatio == 0)
17338         ShufMask[i] = Extract.getConstantOperandVal(1);
17339       else
17340         ShufMask[i] = NumMaskElts;
17341     }
17342
17343     // Undef elements of the build vector remain undef because we initialize
17344     // the shuffle mask with -1.
17345   }
17346
17347   // Turn this into a shuffle with zero if that's legal.
17348   EVT VecVT = Extract.getOperand(0).getValueType();
17349   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
17350     return SDValue();
17351
17352   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17353   // bitcast (shuffle V, ZeroVec, VectorMask)
17354   SDLoc DL(BV);
17355   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17356   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
17357                                       ShufMask);
17358   return DAG.getBitcast(VT, Shuf);
17359 }
17360
17361 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17362 // operations. If the types of the vectors we're extracting from allow it,
17363 // turn this into a vector_shuffle node.
17364 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17365   SDLoc DL(N);
17366   EVT VT = N->getValueType(0);
17367
17368   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17369   if (!isTypeLegal(VT))
17370     return SDValue();
17371
17372   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17373     return V;
17374
17375   // May only combine to shuffle after legalize if shuffle is legal.
17376   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17377     return SDValue();
17378
17379   bool UsesZeroVector = false;
17380   unsigned NumElems = N->getNumOperands();
17381
17382   // Record, for each element of the newly built vector, which input vector
17383   // that element comes from. -1 stands for undef, 0 for the zero vector,
17384   // and positive values for the input vectors.
17385   // VectorMask maps each element to its vector number, and VecIn maps vector
17386   // numbers to their initial SDValues.
17387
17388   SmallVector<int, 8> VectorMask(NumElems, -1);
17389   SmallVector<SDValue, 8> VecIn;
17390   VecIn.push_back(SDValue());
17391
17392   for (unsigned i = 0; i != NumElems; ++i) {
17393     SDValue Op = N->getOperand(i);
17394
17395     if (Op.isUndef())
17396       continue;
17397
17398     // See if we can use a blend with a zero vector.
17399     // TODO: Should we generalize this to a blend with an arbitrary constant
17400     // vector?
17401     if (isNullConstant(Op) || isNullFPConstant(Op)) {
17402       UsesZeroVector = true;
17403       VectorMask[i] = 0;
17404       continue;
17405     }
17406
17407     // Not an undef or zero. If the input is something other than an
17408     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17409     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17410         !isa<ConstantSDNode>(Op.getOperand(1)))
17411       return SDValue();
17412     SDValue ExtractedFromVec = Op.getOperand(0);
17413
17414     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17415     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17416       return SDValue();
17417
17418     // All inputs must have the same element type as the output.
17419     if (VT.getVectorElementType() !=
17420         ExtractedFromVec.getValueType().getVectorElementType())
17421       return SDValue();
17422
17423     // Have we seen this input vector before?
17424     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17425     // a map back from SDValues to numbers isn't worth it.
17426     unsigned Idx = std::distance(
17427         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17428     if (Idx == VecIn.size())
17429       VecIn.push_back(ExtractedFromVec);
17430
17431     VectorMask[i] = Idx;
17432   }
17433
17434   // If we didn't find at least one input vector, bail out.
17435   if (VecIn.size() < 2)
17436     return SDValue();
17437
17438   // If all the Operands of BUILD_VECTOR extract from same
17439   // vector, then split the vector efficiently based on the maximum
17440   // vector access index and adjust the VectorMask and
17441   // VecIn accordingly.
17442   bool DidSplitVec = false;
17443   if (VecIn.size() == 2) {
17444     unsigned MaxIndex = 0;
17445     unsigned NearestPow2 = 0;
17446     SDValue Vec = VecIn.back();
17447     EVT InVT = Vec.getValueType();
17448     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17449     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17450
17451     for (unsigned i = 0; i < NumElems; i++) {
17452       if (VectorMask[i] <= 0)
17453         continue;
17454       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17455       IndexVec[i] = Index;
17456       MaxIndex = std::max(MaxIndex, Index);
17457     }
17458
17459     NearestPow2 = PowerOf2Ceil(MaxIndex);
17460     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17461         NumElems * 2 < NearestPow2) {
17462       unsigned SplitSize = NearestPow2 / 2;
17463       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17464                                      InVT.getVectorElementType(), SplitSize);
17465       if (TLI.isTypeLegal(SplitVT)) {
17466         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17467                                      DAG.getConstant(SplitSize, DL, IdxTy));
17468         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17469                                      DAG.getConstant(0, DL, IdxTy));
17470         VecIn.pop_back();
17471         VecIn.push_back(VecIn1);
17472         VecIn.push_back(VecIn2);
17473         DidSplitVec = true;
17474
17475         for (unsigned i = 0; i < NumElems; i++) {
17476           if (VectorMask[i] <= 0)
17477             continue;
17478           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17479         }
17480       }
17481     }
17482   }
17483
17484   // TODO: We want to sort the vectors by descending length, so that adjacent
17485   // pairs have similar length, and the longer vector is always first in the
17486   // pair.
17487
17488   // TODO: Should this fire if some of the input vectors has illegal type (like
17489   // it does now), or should we let legalization run its course first?
17490
17491   // Shuffle phase:
17492   // Take pairs of vectors, and shuffle them so that the result has elements
17493   // from these vectors in the correct places.
17494   // For example, given:
17495   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17496   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17497   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17498   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17499   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17500   // We will generate:
17501   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17502   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17503   SmallVector<SDValue, 4> Shuffles;
17504   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17505     unsigned LeftIdx = 2 * In + 1;
17506     SDValue VecLeft = VecIn[LeftIdx];
17507     SDValue VecRight =
17508         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17509
17510     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17511                                                 VecRight, LeftIdx, DidSplitVec))
17512       Shuffles.push_back(Shuffle);
17513     else
17514       return SDValue();
17515   }
17516
17517   // If we need the zero vector as an "ingredient" in the blend tree, add it
17518   // to the list of shuffles.
17519   if (UsesZeroVector)
17520     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17521                                       : DAG.getConstantFP(0.0, DL, VT));
17522
17523   // If we only have one shuffle, we're done.
17524   if (Shuffles.size() == 1)
17525     return Shuffles[0];
17526
17527   // Update the vector mask to point to the post-shuffle vectors.
17528   for (int &Vec : VectorMask)
17529     if (Vec == 0)
17530       Vec = Shuffles.size() - 1;
17531     else
17532       Vec = (Vec - 1) / 2;
17533
17534   // More than one shuffle. Generate a binary tree of blends, e.g. if from
17535   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17536   // generate:
17537   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17538   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17539   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17540   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17541   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17542   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17543   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17544
17545   // Make sure the initial size of the shuffle list is even.
17546   if (Shuffles.size() % 2)
17547     Shuffles.push_back(DAG.getUNDEF(VT));
17548
17549   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17550     if (CurSize % 2) {
17551       Shuffles[CurSize] = DAG.getUNDEF(VT);
17552       CurSize++;
17553     }
17554     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17555       int Left = 2 * In;
17556       int Right = 2 * In + 1;
17557       SmallVector<int, 8> Mask(NumElems, -1);
17558       for (unsigned i = 0; i != NumElems; ++i) {
17559         if (VectorMask[i] == Left) {
17560           Mask[i] = i;
17561           VectorMask[i] = In;
17562         } else if (VectorMask[i] == Right) {
17563           Mask[i] = i + NumElems;
17564           VectorMask[i] = In;
17565         }
17566       }
17567
17568       Shuffles[In] =
17569           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17570     }
17571   }
17572   return Shuffles[0];
17573 }
17574
17575 // Try to turn a build vector of zero extends of extract vector elts into a
17576 // a vector zero extend and possibly an extract subvector.
17577 // TODO: Support sign extend?
17578 // TODO: Allow undef elements?
17579 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17580   if (LegalOperations)
17581     return SDValue();
17582
17583   EVT VT = N->getValueType(0);
17584
17585   bool FoundZeroExtend = false;
17586   SDValue Op0 = N->getOperand(0);
17587   auto checkElem = [&](SDValue Op) -> int64_t {
17588     unsigned Opc = Op.getOpcode();
17589     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17590     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17591         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17592         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17593       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17594         return C->getZExtValue();
17595     return -1;
17596   };
17597
17598   // Make sure the first element matches
17599   // (zext (extract_vector_elt X, C))
17600   int64_t Offset = checkElem(Op0);
17601   if (Offset < 0)
17602     return SDValue();
17603
17604   unsigned NumElems = N->getNumOperands();
17605   SDValue In = Op0.getOperand(0).getOperand(0);
17606   EVT InSVT = In.getValueType().getScalarType();
17607   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17608
17609   // Don't create an illegal input type after type legalization.
17610   if (LegalTypes && !TLI.isTypeLegal(InVT))
17611     return SDValue();
17612
17613   // Ensure all the elements come from the same vector and are adjacent.
17614   for (unsigned i = 1; i != NumElems; ++i) {
17615     if ((Offset + i) != checkElem(N->getOperand(i)))
17616       return SDValue();
17617   }
17618
17619   SDLoc DL(N);
17620   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17621                    Op0.getOperand(0).getOperand(1));
17622   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17623                      VT, In);
17624 }
17625
17626 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17627   EVT VT = N->getValueType(0);
17628
17629   // A vector built entirely of undefs is undef.
17630   if (ISD::allOperandsUndef(N))
17631     return DAG.getUNDEF(VT);
17632
17633   // If this is a splat of a bitcast from another vector, change to a
17634   // concat_vector.
17635   // For example:
17636   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17637   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17638   //
17639   // If X is a build_vector itself, the concat can become a larger build_vector.
17640   // TODO: Maybe this is useful for non-splat too?
17641   if (!LegalOperations) {
17642     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17643       Splat = peekThroughBitcasts(Splat);
17644       EVT SrcVT = Splat.getValueType();
17645       if (SrcVT.isVector()) {
17646         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17647         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17648                                      SrcVT.getVectorElementType(), NumElts);
17649         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17650           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17651           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17652                                        NewVT, Ops);
17653           return DAG.getBitcast(VT, Concat);
17654         }
17655       }
17656     }
17657   }
17658
17659   // Check if we can express BUILD VECTOR via subvector extract.
17660   if (!LegalTypes && (N->getNumOperands() > 1)) {
17661     SDValue Op0 = N->getOperand(0);
17662     auto checkElem = [&](SDValue Op) -> uint64_t {
17663       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17664           (Op0.getOperand(0) == Op.getOperand(0)))
17665         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17666           return CNode->getZExtValue();
17667       return -1;
17668     };
17669
17670     int Offset = checkElem(Op0);
17671     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17672       if (Offset + i != checkElem(N->getOperand(i))) {
17673         Offset = -1;
17674         break;
17675       }
17676     }
17677
17678     if ((Offset == 0) &&
17679         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17680       return Op0.getOperand(0);
17681     if ((Offset != -1) &&
17682         ((Offset % N->getValueType(0).getVectorNumElements()) ==
17683          0)) // IDX must be multiple of output size.
17684       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17685                          Op0.getOperand(0), Op0.getOperand(1));
17686   }
17687
17688   if (SDValue V = convertBuildVecZextToZext(N))
17689     return V;
17690
17691   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17692     return V;
17693
17694   if (SDValue V = reduceBuildVecToShuffle(N))
17695     return V;
17696
17697   return SDValue();
17698 }
17699
17700 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17701   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17702   EVT OpVT = N->getOperand(0).getValueType();
17703
17704   // If the operands are legal vectors, leave them alone.
17705   if (TLI.isTypeLegal(OpVT))
17706     return SDValue();
17707
17708   SDLoc DL(N);
17709   EVT VT = N->getValueType(0);
17710   SmallVector<SDValue, 8> Ops;
17711
17712   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17713   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17714
17715   // Keep track of what we encounter.
17716   bool AnyInteger = false;
17717   bool AnyFP = false;
17718   for (const SDValue &Op : N->ops()) {
17719     if (ISD::BITCAST == Op.getOpcode() &&
17720         !Op.getOperand(0).getValueType().isVector())
17721       Ops.push_back(Op.getOperand(0));
17722     else if (ISD::UNDEF == Op.getOpcode())
17723       Ops.push_back(ScalarUndef);
17724     else
17725       return SDValue();
17726
17727     // Note whether we encounter an integer or floating point scalar.
17728     // If it's neither, bail out, it could be something weird like x86mmx.
17729     EVT LastOpVT = Ops.back().getValueType();
17730     if (LastOpVT.isFloatingPoint())
17731       AnyFP = true;
17732     else if (LastOpVT.isInteger())
17733       AnyInteger = true;
17734     else
17735       return SDValue();
17736   }
17737
17738   // If any of the operands is a floating point scalar bitcast to a vector,
17739   // use floating point types throughout, and bitcast everything.
17740   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17741   if (AnyFP) {
17742     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17743     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17744     if (AnyInteger) {
17745       for (SDValue &Op : Ops) {
17746         if (Op.getValueType() == SVT)
17747           continue;
17748         if (Op.isUndef())
17749           Op = ScalarUndef;
17750         else
17751           Op = DAG.getBitcast(SVT, Op);
17752       }
17753     }
17754   }
17755
17756   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17757                                VT.getSizeInBits() / SVT.getSizeInBits());
17758   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17759 }
17760
17761 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17762 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17763 // most two distinct vectors the same size as the result, attempt to turn this
17764 // into a legal shuffle.
17765 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17766   EVT VT = N->getValueType(0);
17767   EVT OpVT = N->getOperand(0).getValueType();
17768   int NumElts = VT.getVectorNumElements();
17769   int NumOpElts = OpVT.getVectorNumElements();
17770
17771   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17772   SmallVector<int, 8> Mask;
17773
17774   for (SDValue Op : N->ops()) {
17775     Op = peekThroughBitcasts(Op);
17776
17777     // UNDEF nodes convert to UNDEF shuffle mask values.
17778     if (Op.isUndef()) {
17779       Mask.append((unsigned)NumOpElts, -1);
17780       continue;
17781     }
17782
17783     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17784       return SDValue();
17785
17786     // What vector are we extracting the subvector from and at what index?
17787     SDValue ExtVec = Op.getOperand(0);
17788
17789     // We want the EVT of the original extraction to correctly scale the
17790     // extraction index.
17791     EVT ExtVT = ExtVec.getValueType();
17792     ExtVec = peekThroughBitcasts(ExtVec);
17793
17794     // UNDEF nodes convert to UNDEF shuffle mask values.
17795     if (ExtVec.isUndef()) {
17796       Mask.append((unsigned)NumOpElts, -1);
17797       continue;
17798     }
17799
17800     if (!isa<ConstantSDNode>(Op.getOperand(1)))
17801       return SDValue();
17802     int ExtIdx = Op.getConstantOperandVal(1);
17803
17804     // Ensure that we are extracting a subvector from a vector the same
17805     // size as the result.
17806     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17807       return SDValue();
17808
17809     // Scale the subvector index to account for any bitcast.
17810     int NumExtElts = ExtVT.getVectorNumElements();
17811     if (0 == (NumExtElts % NumElts))
17812       ExtIdx /= (NumExtElts / NumElts);
17813     else if (0 == (NumElts % NumExtElts))
17814       ExtIdx *= (NumElts / NumExtElts);
17815     else
17816       return SDValue();
17817
17818     // At most we can reference 2 inputs in the final shuffle.
17819     if (SV0.isUndef() || SV0 == ExtVec) {
17820       SV0 = ExtVec;
17821       for (int i = 0; i != NumOpElts; ++i)
17822         Mask.push_back(i + ExtIdx);
17823     } else if (SV1.isUndef() || SV1 == ExtVec) {
17824       SV1 = ExtVec;
17825       for (int i = 0; i != NumOpElts; ++i)
17826         Mask.push_back(i + ExtIdx + NumElts);
17827     } else {
17828       return SDValue();
17829     }
17830   }
17831
17832   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
17833     return SDValue();
17834
17835   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17836                               DAG.getBitcast(VT, SV1), Mask);
17837 }
17838
17839 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17840   // If we only have one input vector, we don't need to do any concatenation.
17841   if (N->getNumOperands() == 1)
17842     return N->getOperand(0);
17843
17844   // Check if all of the operands are undefs.
17845   EVT VT = N->getValueType(0);
17846   if (ISD::allOperandsUndef(N))
17847     return DAG.getUNDEF(VT);
17848
17849   // Optimize concat_vectors where all but the first of the vectors are undef.
17850   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17851         return Op.isUndef();
17852       })) {
17853     SDValue In = N->getOperand(0);
17854     assert(In.getValueType().isVector() && "Must concat vectors");
17855
17856     SDValue Scalar = peekThroughOneUseBitcasts(In);
17857
17858     // concat_vectors(scalar_to_vector(scalar), undef) ->
17859     //     scalar_to_vector(scalar)
17860     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17861          Scalar.hasOneUse()) {
17862       EVT SVT = Scalar.getValueType().getVectorElementType();
17863       if (SVT == Scalar.getOperand(0).getValueType())
17864         Scalar = Scalar.getOperand(0);
17865     }
17866
17867     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17868     if (!Scalar.getValueType().isVector()) {
17869       // If the bitcast type isn't legal, it might be a trunc of a legal type;
17870       // look through the trunc so we can still do the transform:
17871       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17872       if (Scalar->getOpcode() == ISD::TRUNCATE &&
17873           !TLI.isTypeLegal(Scalar.getValueType()) &&
17874           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17875         Scalar = Scalar->getOperand(0);
17876
17877       EVT SclTy = Scalar.getValueType();
17878
17879       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17880         return SDValue();
17881
17882       // Bail out if the vector size is not a multiple of the scalar size.
17883       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17884         return SDValue();
17885
17886       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17887       if (VNTNumElms < 2)
17888         return SDValue();
17889
17890       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17891       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17892         return SDValue();
17893
17894       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17895       return DAG.getBitcast(VT, Res);
17896     }
17897   }
17898
17899   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17900   // We have already tested above for an UNDEF only concatenation.
17901   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17902   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17903   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17904     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17905   };
17906   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17907     SmallVector<SDValue, 8> Opnds;
17908     EVT SVT = VT.getScalarType();
17909
17910     EVT MinVT = SVT;
17911     if (!SVT.isFloatingPoint()) {
17912       // If BUILD_VECTOR are from built from integer, they may have different
17913       // operand types. Get the smallest type and truncate all operands to it.
17914       bool FoundMinVT = false;
17915       for (const SDValue &Op : N->ops())
17916         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17917           EVT OpSVT = Op.getOperand(0).getValueType();
17918           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17919           FoundMinVT = true;
17920         }
17921       assert(FoundMinVT && "Concat vector type mismatch");
17922     }
17923
17924     for (const SDValue &Op : N->ops()) {
17925       EVT OpVT = Op.getValueType();
17926       unsigned NumElts = OpVT.getVectorNumElements();
17927
17928       if (ISD::UNDEF == Op.getOpcode())
17929         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17930
17931       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17932         if (SVT.isFloatingPoint()) {
17933           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17934           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17935         } else {
17936           for (unsigned i = 0; i != NumElts; ++i)
17937             Opnds.push_back(
17938                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17939         }
17940       }
17941     }
17942
17943     assert(VT.getVectorNumElements() == Opnds.size() &&
17944            "Concat vector type mismatch");
17945     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17946   }
17947
17948   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17949   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17950     return V;
17951
17952   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17953   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17954     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17955       return V;
17956
17957   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17958   // nodes often generate nop CONCAT_VECTOR nodes.
17959   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17960   // place the incoming vectors at the exact same location.
17961   SDValue SingleSource = SDValue();
17962   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17963
17964   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17965     SDValue Op = N->getOperand(i);
17966
17967     if (Op.isUndef())
17968       continue;
17969
17970     // Check if this is the identity extract:
17971     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17972       return SDValue();
17973
17974     // Find the single incoming vector for the extract_subvector.
17975     if (SingleSource.getNode()) {
17976       if (Op.getOperand(0) != SingleSource)
17977         return SDValue();
17978     } else {
17979       SingleSource = Op.getOperand(0);
17980
17981       // Check the source type is the same as the type of the result.
17982       // If not, this concat may extend the vector, so we can not
17983       // optimize it away.
17984       if (SingleSource.getValueType() != N->getValueType(0))
17985         return SDValue();
17986     }
17987
17988     auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17989     // The extract index must be constant.
17990     if (!CS)
17991       return SDValue();
17992
17993     // Check that we are reading from the identity index.
17994     unsigned IdentityIndex = i * PartNumElem;
17995     if (CS->getAPIntValue() != IdentityIndex)
17996       return SDValue();
17997   }
17998
17999   if (SingleSource.getNode())
18000     return SingleSource;
18001
18002   return SDValue();
18003 }
18004
18005 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
18006                                               SelectionDAG &DAG) {
18007   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18008   SDValue BinOp = Extract->getOperand(0);
18009   unsigned BinOpcode = BinOp.getOpcode();
18010   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
18011     return SDValue();
18012
18013   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18014   SDValue Index = Extract->getOperand(1);
18015   EVT VT = Extract->getValueType(0);
18016
18017   // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
18018   // if the source subvector is the same type as the one being extracted.
18019   auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
18020     if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
18021         V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
18022       return V.getOperand(1);
18023     }
18024     auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18025     if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
18026         V.getOperand(0).getValueType() == VT &&
18027         (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
18028       uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
18029       return V.getOperand(SubIdx);
18030     }
18031     return SDValue();
18032   };
18033   SDValue Sub0 = GetSubVector(Bop0);
18034   SDValue Sub1 = GetSubVector(Bop1);
18035
18036   // TODO: We could handle the case where only 1 operand is being inserted by
18037   //       creating an extract of the other operand, but that requires checking
18038   //       number of uses and/or costs.
18039   if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
18040     return SDValue();
18041
18042   // We are inserting both operands of the wide binop only to extract back
18043   // to the narrow vector size. Eliminate all of the insert/extract:
18044   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18045   return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
18046                      BinOp->getFlags());
18047 }
18048
18049 /// If we are extracting a subvector produced by a wide binary operator try
18050 /// to use a narrow binary operator and/or avoid concatenation and extraction.
18051 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
18052   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18053   // some of these bailouts with other transforms.
18054
18055   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18056     return V;
18057
18058   // The extract index must be a constant, so we can map it to a concat operand.
18059   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18060   if (!ExtractIndexC)
18061     return SDValue();
18062
18063   // We are looking for an optionally bitcasted wide vector binary operator
18064   // feeding an extract subvector.
18065   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18066   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18067   unsigned BOpcode = BinOp.getOpcode();
18068   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
18069     return SDValue();
18070
18071   // The binop must be a vector type, so we can extract some fraction of it.
18072   EVT WideBVT = BinOp.getValueType();
18073   if (!WideBVT.isVector())
18074     return SDValue();
18075
18076   EVT VT = Extract->getValueType(0);
18077   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18078   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
18079          "Extract index is not a multiple of the vector length.");
18080
18081   // Bail out if this is not a proper multiple width extraction.
18082   unsigned WideWidth = WideBVT.getSizeInBits();
18083   unsigned NarrowWidth = VT.getSizeInBits();
18084   if (WideWidth % NarrowWidth != 0)
18085     return SDValue();
18086
18087   // Bail out if we are extracting a fraction of a single operation. This can
18088   // occur because we potentially looked through a bitcast of the binop.
18089   unsigned NarrowingRatio = WideWidth / NarrowWidth;
18090   unsigned WideNumElts = WideBVT.getVectorNumElements();
18091   if (WideNumElts % NarrowingRatio != 0)
18092     return SDValue();
18093
18094   // Bail out if the target does not support a narrower version of the binop.
18095   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18096                                    WideNumElts / NarrowingRatio);
18097   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18098     return SDValue();
18099
18100   // If extraction is cheap, we don't need to look at the binop operands
18101   // for concat ops. The narrow binop alone makes this transform profitable.
18102   // We can't just reuse the original extract index operand because we may have
18103   // bitcasted.
18104   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18105   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18106   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
18107   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18108       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
18109     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18110     SDLoc DL(Extract);
18111     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18112     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18113                             BinOp.getOperand(0), NewExtIndex);
18114     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18115                             BinOp.getOperand(1), NewExtIndex);
18116     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18117                                       BinOp.getNode()->getFlags());
18118     return DAG.getBitcast(VT, NarrowBinOp);
18119   }
18120
18121   // Only handle the case where we are doubling and then halving. A larger ratio
18122   // may require more than two narrow binops to replace the wide binop.
18123   if (NarrowingRatio != 2)
18124     return SDValue();
18125
18126   // TODO: The motivating case for this transform is an x86 AVX1 target. That
18127   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18128   // flavors, but no other 256-bit integer support. This could be extended to
18129   // handle any binop, but that may require fixing/adding other folds to avoid
18130   // codegen regressions.
18131   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
18132     return SDValue();
18133
18134   // We need at least one concatenation operation of a binop operand to make
18135   // this transform worthwhile. The concat must double the input vector sizes.
18136   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18137     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18138       return V.getOperand(ConcatOpNum);
18139     return SDValue();
18140   };
18141   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18142   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18143
18144   if (SubVecL || SubVecR) {
18145     // If a binop operand was not the result of a concat, we must extract a
18146     // half-sized operand for our new narrow binop:
18147     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18148     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18149     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18150     SDLoc DL(Extract);
18151     SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18152     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18153                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18154                                       BinOp.getOperand(0), IndexC);
18155
18156     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18157                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18158                                       BinOp.getOperand(1), IndexC);
18159
18160     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18161     return DAG.getBitcast(VT, NarrowBinOp);
18162   }
18163
18164   return SDValue();
18165 }
18166
18167 /// If we are extracting a subvector from a wide vector load, convert to a
18168 /// narrow load to eliminate the extraction:
18169 /// (extract_subvector (load wide vector)) --> (load narrow vector)
18170 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18171   // TODO: Add support for big-endian. The offset calculation must be adjusted.
18172   if (DAG.getDataLayout().isBigEndian())
18173     return SDValue();
18174
18175   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18176   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18177   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
18178     return SDValue();
18179
18180   // Allow targets to opt-out.
18181   EVT VT = Extract->getValueType(0);
18182   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18183   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18184     return SDValue();
18185
18186   // The narrow load will be offset from the base address of the old load if
18187   // we are extracting from something besides index 0 (little-endian).
18188   SDLoc DL(Extract);
18189   SDValue BaseAddr = Ld->getOperand(1);
18190   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18191
18192   // TODO: Use "BaseIndexOffset" to make this more effective.
18193   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18194   MachineFunction &MF = DAG.getMachineFunction();
18195   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18196                                                    VT.getStoreSize());
18197   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18198   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18199   return NewLd;
18200 }
18201
18202 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18203   EVT NVT = N->getValueType(0);
18204   SDValue V = N->getOperand(0);
18205
18206   // Extract from UNDEF is UNDEF.
18207   if (V.isUndef())
18208     return DAG.getUNDEF(NVT);
18209
18210   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18211     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18212       return NarrowLoad;
18213
18214   // Combine an extract of an extract into a single extract_subvector.
18215   // ext (ext X, C), 0 --> ext X, C
18216   SDValue Index = N->getOperand(1);
18217   if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18218       V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18219     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18220                                     V.getConstantOperandVal(1)) &&
18221         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
18222       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18223                          V.getOperand(1));
18224     }
18225   }
18226
18227   // Try to move vector bitcast after extract_subv by scaling extraction index:
18228   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18229   if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18230       V.getOperand(0).getValueType().isVector()) {
18231     SDValue SrcOp = V.getOperand(0);
18232     EVT SrcVT = SrcOp.getValueType();
18233     unsigned SrcNumElts = SrcVT.getVectorNumElements();
18234     unsigned DestNumElts = V.getValueType().getVectorNumElements();
18235     if ((SrcNumElts % DestNumElts) == 0) {
18236       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18237       unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18238       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18239                                       NewExtNumElts);
18240       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18241         unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18242         SDLoc DL(N);
18243         SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
18244         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18245                                          V.getOperand(0), NewIndex);
18246         return DAG.getBitcast(NVT, NewExtract);
18247       }
18248     }
18249     // TODO - handle (DestNumElts % SrcNumElts) == 0
18250   }
18251
18252   // Combine:
18253   //    (extract_subvec (concat V1, V2, ...), i)
18254   // Into:
18255   //    Vi if possible
18256   // Only operand 0 is checked as 'concat' assumes all inputs of the same
18257   // type.
18258   if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
18259       V.getOperand(0).getValueType() == NVT) {
18260     unsigned Idx = N->getConstantOperandVal(1);
18261     unsigned NumElems = NVT.getVectorNumElements();
18262     assert((Idx % NumElems) == 0 &&
18263            "IDX in concat is not a multiple of the result vector length.");
18264     return V->getOperand(Idx / NumElems);
18265   }
18266
18267   V = peekThroughBitcasts(V);
18268
18269   // If the input is a build vector. Try to make a smaller build vector.
18270   if (V.getOpcode() == ISD::BUILD_VECTOR) {
18271     if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18272       EVT InVT = V.getValueType();
18273       unsigned ExtractSize = NVT.getSizeInBits();
18274       unsigned EltSize = InVT.getScalarSizeInBits();
18275       // Only do this if we won't split any elements.
18276       if (ExtractSize % EltSize == 0) {
18277         unsigned NumElems = ExtractSize / EltSize;
18278         EVT EltVT = InVT.getVectorElementType();
18279         EVT ExtractVT = NumElems == 1 ? EltVT
18280                                       : EVT::getVectorVT(*DAG.getContext(),
18281                                                          EltVT, NumElems);
18282         if ((Level < AfterLegalizeDAG ||
18283              (NumElems == 1 ||
18284               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18285             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18286           unsigned IdxVal = IdxC->getZExtValue();
18287           IdxVal *= NVT.getScalarSizeInBits();
18288           IdxVal /= EltSize;
18289
18290           if (NumElems == 1) {
18291             SDValue Src = V->getOperand(IdxVal);
18292             if (EltVT != Src.getValueType())
18293               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18294             return DAG.getBitcast(NVT, Src);
18295           }
18296
18297           // Extract the pieces from the original build_vector.
18298           SDValue BuildVec = DAG.getBuildVector(
18299               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18300           return DAG.getBitcast(NVT, BuildVec);
18301         }
18302       }
18303     }
18304   }
18305
18306   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18307     // Handle only simple case where vector being inserted and vector
18308     // being extracted are of same size.
18309     EVT SmallVT = V.getOperand(1).getValueType();
18310     if (!NVT.bitsEq(SmallVT))
18311       return SDValue();
18312
18313     // Only handle cases where both indexes are constants.
18314     auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18315     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18316     if (InsIdx && ExtIdx) {
18317       // Combine:
18318       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18319       // Into:
18320       //    indices are equal or bit offsets are equal => V1
18321       //    otherwise => (extract_subvec V1, ExtIdx)
18322       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18323           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18324         return DAG.getBitcast(NVT, V.getOperand(1));
18325       return DAG.getNode(
18326           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18327           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18328           Index);
18329     }
18330   }
18331
18332   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18333     return NarrowBOp;
18334
18335   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18336     return SDValue(N, 0);
18337
18338   return SDValue();
18339 }
18340
18341 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18342 /// followed by concatenation. Narrow vector ops may have better performance
18343 /// than wide ops, and this can unlock further narrowing of other vector ops.
18344 /// Targets can invert this transform later if it is not profitable.
18345 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18346                                          SelectionDAG &DAG) {
18347   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18348   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18349       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18350       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18351     return SDValue();
18352
18353   // Split the wide shuffle mask into halves. Any mask element that is accessing
18354   // operand 1 is offset down to account for narrowing of the vectors.
18355   ArrayRef<int> Mask = Shuf->getMask();
18356   EVT VT = Shuf->getValueType(0);
18357   unsigned NumElts = VT.getVectorNumElements();
18358   unsigned HalfNumElts = NumElts / 2;
18359   SmallVector<int, 16> Mask0(HalfNumElts, -1);
18360   SmallVector<int, 16> Mask1(HalfNumElts, -1);
18361   for (unsigned i = 0; i != NumElts; ++i) {
18362     if (Mask[i] == -1)
18363       continue;
18364     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18365     if (i < HalfNumElts)
18366       Mask0[i] = M;
18367     else
18368       Mask1[i - HalfNumElts] = M;
18369   }
18370
18371   // Ask the target if this is a valid transform.
18372   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18373   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18374                                 HalfNumElts);
18375   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18376       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18377     return SDValue();
18378
18379   // shuffle (concat X, undef), (concat Y, undef), Mask -->
18380   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18381   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18382   SDLoc DL(Shuf);
18383   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18384   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18385   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18386 }
18387
18388 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18389 // or turn a shuffle of a single concat into simpler shuffle then concat.
18390 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18391   EVT VT = N->getValueType(0);
18392   unsigned NumElts = VT.getVectorNumElements();
18393
18394   SDValue N0 = N->getOperand(0);
18395   SDValue N1 = N->getOperand(1);
18396   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18397   ArrayRef<int> Mask = SVN->getMask();
18398
18399   SmallVector<SDValue, 4> Ops;
18400   EVT ConcatVT = N0.getOperand(0).getValueType();
18401   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18402   unsigned NumConcats = NumElts / NumElemsPerConcat;
18403
18404   auto IsUndefMaskElt = [](int i) { return i == -1; };
18405
18406   // Special case: shuffle(concat(A,B)) can be more efficiently represented
18407   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18408   // half vector elements.
18409   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
18410       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18411                    IsUndefMaskElt)) {
18412     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18413                               N0.getOperand(1),
18414                               Mask.slice(0, NumElemsPerConcat));
18415     N1 = DAG.getUNDEF(ConcatVT);
18416     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18417   }
18418
18419   // Look at every vector that's inserted. We're looking for exact
18420   // subvector-sized copies from a concatenated vector
18421   for (unsigned I = 0; I != NumConcats; ++I) {
18422     unsigned Begin = I * NumElemsPerConcat;
18423     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18424
18425     // Make sure we're dealing with a copy.
18426     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18427       Ops.push_back(DAG.getUNDEF(ConcatVT));
18428       continue;
18429     }
18430
18431     int OpIdx = -1;
18432     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
18433       if (IsUndefMaskElt(SubMask[i]))
18434         continue;
18435       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18436         return SDValue();
18437       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18438       if (0 <= OpIdx && EltOpIdx != OpIdx)
18439         return SDValue();
18440       OpIdx = EltOpIdx;
18441     }
18442     assert(0 <= OpIdx && "Unknown concat_vectors op");
18443
18444     if (OpIdx < (int)N0.getNumOperands())
18445       Ops.push_back(N0.getOperand(OpIdx));
18446     else
18447       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18448   }
18449
18450   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18451 }
18452
18453 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18454 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18455 //
18456 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18457 // a simplification in some sense, but it isn't appropriate in general: some
18458 // BUILD_VECTORs are substantially cheaper than others. The general case
18459 // of a BUILD_VECTOR requires inserting each element individually (or
18460 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18461 // all constants is a single constant pool load.  A BUILD_VECTOR where each
18462 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
18463 // are undef lowers to a small number of element insertions.
18464 //
18465 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18466 // We don't fold shuffles where one side is a non-zero constant, and we don't
18467 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18468 // non-constant operands. This seems to work out reasonably well in practice.
18469 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
18470                                        SelectionDAG &DAG,
18471                                        const TargetLowering &TLI) {
18472   EVT VT = SVN->getValueType(0);
18473   unsigned NumElts = VT.getVectorNumElements();
18474   SDValue N0 = SVN->getOperand(0);
18475   SDValue N1 = SVN->getOperand(1);
18476
18477   if (!N0->hasOneUse())
18478     return SDValue();
18479
18480   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18481   // discussed above.
18482   if (!N1.isUndef()) {
18483     if (!N1->hasOneUse())
18484       return SDValue();
18485
18486     bool N0AnyConst = isAnyConstantBuildVector(N0);
18487     bool N1AnyConst = isAnyConstantBuildVector(N1);
18488     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
18489       return SDValue();
18490     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
18491       return SDValue();
18492   }
18493
18494   // If both inputs are splats of the same value then we can safely merge this
18495   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18496   bool IsSplat = false;
18497   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18498   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18499   if (BV0 && BV1)
18500     if (SDValue Splat0 = BV0->getSplatValue())
18501       IsSplat = (Splat0 == BV1->getSplatValue());
18502
18503   SmallVector<SDValue, 8> Ops;
18504   SmallSet<SDValue, 16> DuplicateOps;
18505   for (int M : SVN->getMask()) {
18506     SDValue Op = DAG.getUNDEF(VT.getScalarType());
18507     if (M >= 0) {
18508       int Idx = M < (int)NumElts ? M : M - NumElts;
18509       SDValue &S = (M < (int)NumElts ? N0 : N1);
18510       if (S.getOpcode() == ISD::BUILD_VECTOR) {
18511         Op = S.getOperand(Idx);
18512       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18513         SDValue Op0 = S.getOperand(0);
18514         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
18515       } else {
18516         // Operand can't be combined - bail out.
18517         return SDValue();
18518       }
18519     }
18520
18521     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18522     // generating a splat; semantically, this is fine, but it's likely to
18523     // generate low-quality code if the target can't reconstruct an appropriate
18524     // shuffle.
18525     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18526       if (!IsSplat && !DuplicateOps.insert(Op).second)
18527         return SDValue();
18528
18529     Ops.push_back(Op);
18530   }
18531
18532   // BUILD_VECTOR requires all inputs to be of the same type, find the
18533   // maximum type and extend them all.
18534   EVT SVT = VT.getScalarType();
18535   if (SVT.isInteger())
18536     for (SDValue &Op : Ops)
18537       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
18538   if (SVT != VT.getScalarType())
18539     for (SDValue &Op : Ops)
18540       Op = TLI.isZExtFree(Op.getValueType(), SVT)
18541                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
18542                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18543   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18544 }
18545
18546 // Match shuffles that can be converted to any_vector_extend_in_reg.
18547 // This is often generated during legalization.
18548 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18549 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18550 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
18551                                             SelectionDAG &DAG,
18552                                             const TargetLowering &TLI,
18553                                             bool LegalOperations) {
18554   EVT VT = SVN->getValueType(0);
18555   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18556
18557   // TODO Add support for big-endian when we have a test case.
18558   if (!VT.isInteger() || IsBigEndian)
18559     return SDValue();
18560
18561   unsigned NumElts = VT.getVectorNumElements();
18562   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18563   ArrayRef<int> Mask = SVN->getMask();
18564   SDValue N0 = SVN->getOperand(0);
18565
18566   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18567   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18568     for (unsigned i = 0; i != NumElts; ++i) {
18569       if (Mask[i] < 0)
18570         continue;
18571       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
18572         continue;
18573       return false;
18574     }
18575     return true;
18576   };
18577
18578   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18579   // power-of-2 extensions as they are the most likely.
18580   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
18581     // Check for non power of 2 vector sizes
18582     if (NumElts % Scale != 0)
18583       continue;
18584     if (!isAnyExtend(Scale))
18585       continue;
18586
18587     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18588     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18589     // Never create an illegal type. Only create unsupported operations if we
18590     // are pre-legalization.
18591     if (TLI.isTypeLegal(OutVT))
18592       if (!LegalOperations ||
18593           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
18594         return DAG.getBitcast(VT,
18595                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
18596                                           SDLoc(SVN), OutVT, N0));
18597   }
18598
18599   return SDValue();
18600 }
18601
18602 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18603 // each source element of a large type into the lowest elements of a smaller
18604 // destination type. This is often generated during legalization.
18605 // If the source node itself was a '*_extend_vector_inreg' node then we should
18606 // then be able to remove it.
18607 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
18608                                         SelectionDAG &DAG) {
18609   EVT VT = SVN->getValueType(0);
18610   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18611
18612   // TODO Add support for big-endian when we have a test case.
18613   if (!VT.isInteger() || IsBigEndian)
18614     return SDValue();
18615
18616   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
18617
18618   unsigned Opcode = N0.getOpcode();
18619   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18620       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
18621       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
18622     return SDValue();
18623
18624   SDValue N00 = N0.getOperand(0);
18625   ArrayRef<int> Mask = SVN->getMask();
18626   unsigned NumElts = VT.getVectorNumElements();
18627   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18628   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18629   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18630
18631   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18632     return SDValue();
18633   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18634
18635   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18636   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18637   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18638   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18639     for (unsigned i = 0; i != NumElts; ++i) {
18640       if (Mask[i] < 0)
18641         continue;
18642       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18643         continue;
18644       return false;
18645     }
18646     return true;
18647   };
18648
18649   // At the moment we just handle the case where we've truncated back to the
18650   // same size as before the extension.
18651   // TODO: handle more extension/truncation cases as cases arise.
18652   if (EltSizeInBits != ExtSrcSizeInBits)
18653     return SDValue();
18654
18655   // We can remove *extend_vector_inreg only if the truncation happens at
18656   // the same scale as the extension.
18657   if (isTruncate(ExtScale))
18658     return DAG.getBitcast(VT, N00);
18659
18660   return SDValue();
18661 }
18662
18663 // Combine shuffles of splat-shuffles of the form:
18664 // shuffle (shuffle V, undef, splat-mask), undef, M
18665 // If splat-mask contains undef elements, we need to be careful about
18666 // introducing undef's in the folded mask which are not the result of composing
18667 // the masks of the shuffles.
18668 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18669                                         SelectionDAG &DAG) {
18670   if (!Shuf->getOperand(1).isUndef())
18671     return SDValue();
18672   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18673   if (!Splat || !Splat->isSplat())
18674     return SDValue();
18675
18676   ArrayRef<int> ShufMask = Shuf->getMask();
18677   ArrayRef<int> SplatMask = Splat->getMask();
18678   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18679
18680   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18681   // every undef mask element in the splat-shuffle has a corresponding undef
18682   // element in the user-shuffle's mask or if the composition of mask elements
18683   // would result in undef.
18684   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18685   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18686   //   In this case it is not legal to simplify to the splat-shuffle because we
18687   //   may be exposing the users of the shuffle an undef element at index 1
18688   //   which was not there before the combine.
18689   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18690   //   In this case the composition of masks yields SplatMask, so it's ok to
18691   //   simplify to the splat-shuffle.
18692   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18693   //   In this case the composed mask includes all undef elements of SplatMask
18694   //   and in addition sets element zero to undef. It is safe to simplify to
18695   //   the splat-shuffle.
18696   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18697                                        ArrayRef<int> SplatMask) {
18698     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18699       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18700           SplatMask[UserMask[i]] != -1)
18701         return false;
18702     return true;
18703   };
18704   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18705     return Shuf->getOperand(0);
18706
18707   // Create a new shuffle with a mask that is composed of the two shuffles'
18708   // masks.
18709   SmallVector<int, 32> NewMask;
18710   for (int Idx : ShufMask)
18711     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18712
18713   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18714                               Splat->getOperand(0), Splat->getOperand(1),
18715                               NewMask);
18716 }
18717
18718 /// If the shuffle mask is taking exactly one element from the first vector
18719 /// operand and passing through all other elements from the second vector
18720 /// operand, return the index of the mask element that is choosing an element
18721 /// from the first operand. Otherwise, return -1.
18722 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18723   int MaskSize = Mask.size();
18724   int EltFromOp0 = -1;
18725   // TODO: This does not match if there are undef elements in the shuffle mask.
18726   // Should we ignore undefs in the shuffle mask instead? The trade-off is
18727   // removing an instruction (a shuffle), but losing the knowledge that some
18728   // vector lanes are not needed.
18729   for (int i = 0; i != MaskSize; ++i) {
18730     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18731       // We're looking for a shuffle of exactly one element from operand 0.
18732       if (EltFromOp0 != -1)
18733         return -1;
18734       EltFromOp0 = i;
18735     } else if (Mask[i] != i + MaskSize) {
18736       // Nothing from operand 1 can change lanes.
18737       return -1;
18738     }
18739   }
18740   return EltFromOp0;
18741 }
18742
18743 /// If a shuffle inserts exactly one element from a source vector operand into
18744 /// another vector operand and we can access the specified element as a scalar,
18745 /// then we can eliminate the shuffle.
18746 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18747                                       SelectionDAG &DAG) {
18748   // First, check if we are taking one element of a vector and shuffling that
18749   // element into another vector.
18750   ArrayRef<int> Mask = Shuf->getMask();
18751   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18752   SDValue Op0 = Shuf->getOperand(0);
18753   SDValue Op1 = Shuf->getOperand(1);
18754   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18755   if (ShufOp0Index == -1) {
18756     // Commute mask and check again.
18757     ShuffleVectorSDNode::commuteMask(CommutedMask);
18758     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18759     if (ShufOp0Index == -1)
18760       return SDValue();
18761     // Commute operands to match the commuted shuffle mask.
18762     std::swap(Op0, Op1);
18763     Mask = CommutedMask;
18764   }
18765
18766   // The shuffle inserts exactly one element from operand 0 into operand 1.
18767   // Now see if we can access that element as a scalar via a real insert element
18768   // instruction.
18769   // TODO: We can try harder to locate the element as a scalar. Examples: it
18770   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18771   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18772          "Shuffle mask value must be from operand 0");
18773   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18774     return SDValue();
18775
18776   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18777   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18778     return SDValue();
18779
18780   // There's an existing insertelement with constant insertion index, so we
18781   // don't need to check the legality/profitability of a replacement operation
18782   // that differs at most in the constant value. The target should be able to
18783   // lower any of those in a similar way. If not, legalization will expand this
18784   // to a scalar-to-vector plus shuffle.
18785   //
18786   // Note that the shuffle may move the scalar from the position that the insert
18787   // element used. Therefore, our new insert element occurs at the shuffle's
18788   // mask index value, not the insert's index value.
18789   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18790   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18791                                         Op0.getOperand(2).getValueType());
18792   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18793                      Op1, Op0.getOperand(1), NewInsIndex);
18794 }
18795
18796 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18797 /// completely. This has the potential to lose undef knowledge because the first
18798 /// shuffle may not have an undef mask element where the second one does. So
18799 /// only call this after doing simplifications based on demanded elements.
18800 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18801   // shuf (shuf0 X, Y, Mask0), undef, Mask
18802   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18803   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18804     return SDValue();
18805
18806   ArrayRef<int> Mask = Shuf->getMask();
18807   ArrayRef<int> Mask0 = Shuf0->getMask();
18808   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18809     // Ignore undef elements.
18810     if (Mask[i] == -1)
18811       continue;
18812     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18813
18814     // Is the element of the shuffle operand chosen by this shuffle the same as
18815     // the element chosen by the shuffle operand itself?
18816     if (Mask0[Mask[i]] != Mask0[i])
18817       return SDValue();
18818   }
18819   // Every element of this shuffle is identical to the result of the previous
18820   // shuffle, so we can replace this value.
18821   return Shuf->getOperand(0);
18822 }
18823
18824 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18825   EVT VT = N->getValueType(0);
18826   unsigned NumElts = VT.getVectorNumElements();
18827
18828   SDValue N0 = N->getOperand(0);
18829   SDValue N1 = N->getOperand(1);
18830
18831   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18832
18833   // Canonicalize shuffle undef, undef -> undef
18834   if (N0.isUndef() && N1.isUndef())
18835     return DAG.getUNDEF(VT);
18836
18837   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18838
18839   // Canonicalize shuffle v, v -> v, undef
18840   if (N0 == N1) {
18841     SmallVector<int, 8> NewMask;
18842     for (unsigned i = 0; i != NumElts; ++i) {
18843       int Idx = SVN->getMaskElt(i);
18844       if (Idx >= (int)NumElts) Idx -= NumElts;
18845       NewMask.push_back(Idx);
18846     }
18847     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18848   }
18849
18850   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18851   if (N0.isUndef())
18852     return DAG.getCommutedVectorShuffle(*SVN);
18853
18854   // Remove references to rhs if it is undef
18855   if (N1.isUndef()) {
18856     bool Changed = false;
18857     SmallVector<int, 8> NewMask;
18858     for (unsigned i = 0; i != NumElts; ++i) {
18859       int Idx = SVN->getMaskElt(i);
18860       if (Idx >= (int)NumElts) {
18861         Idx = -1;
18862         Changed = true;
18863       }
18864       NewMask.push_back(Idx);
18865     }
18866     if (Changed)
18867       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18868   }
18869
18870   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18871     return InsElt;
18872
18873   // A shuffle of a single vector that is a splatted value can always be folded.
18874   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18875     return V;
18876
18877   // If it is a splat, check if the argument vector is another splat or a
18878   // build_vector.
18879   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18880     int SplatIndex = SVN->getSplatIndex();
18881     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18882         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
18883       // splat (vector_bo L, R), Index -->
18884       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18885       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18886       SDLoc DL(N);
18887       EVT EltVT = VT.getScalarType();
18888       SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18889       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18890       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18891       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18892                                   N0.getNode()->getFlags());
18893       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18894       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18895       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18896     }
18897
18898     // If this is a bit convert that changes the element type of the vector but
18899     // not the number of vector elements, look through it.  Be careful not to
18900     // look though conversions that change things like v4f32 to v2f64.
18901     SDNode *V = N0.getNode();
18902     if (V->getOpcode() == ISD::BITCAST) {
18903       SDValue ConvInput = V->getOperand(0);
18904       if (ConvInput.getValueType().isVector() &&
18905           ConvInput.getValueType().getVectorNumElements() == NumElts)
18906         V = ConvInput.getNode();
18907     }
18908
18909     if (V->getOpcode() == ISD::BUILD_VECTOR) {
18910       assert(V->getNumOperands() == NumElts &&
18911              "BUILD_VECTOR has wrong number of operands");
18912       SDValue Base;
18913       bool AllSame = true;
18914       for (unsigned i = 0; i != NumElts; ++i) {
18915         if (!V->getOperand(i).isUndef()) {
18916           Base = V->getOperand(i);
18917           break;
18918         }
18919       }
18920       // Splat of <u, u, u, u>, return <u, u, u, u>
18921       if (!Base.getNode())
18922         return N0;
18923       for (unsigned i = 0; i != NumElts; ++i) {
18924         if (V->getOperand(i) != Base) {
18925           AllSame = false;
18926           break;
18927         }
18928       }
18929       // Splat of <x, x, x, x>, return <x, x, x, x>
18930       if (AllSame)
18931         return N0;
18932
18933       // Canonicalize any other splat as a build_vector.
18934       SDValue Splatted = V->getOperand(SplatIndex);
18935       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18936       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18937
18938       // We may have jumped through bitcasts, so the type of the
18939       // BUILD_VECTOR may not match the type of the shuffle.
18940       if (V->getValueType(0) != VT)
18941         NewBV = DAG.getBitcast(VT, NewBV);
18942       return NewBV;
18943     }
18944   }
18945
18946   // Simplify source operands based on shuffle mask.
18947   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18948     return SDValue(N, 0);
18949
18950   // This is intentionally placed after demanded elements simplification because
18951   // it could eliminate knowledge of undef elements created by this shuffle.
18952   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18953     return ShufOp;
18954
18955   // Match shuffles that can be converted to any_vector_extend_in_reg.
18956   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18957     return V;
18958
18959   // Combine "truncate_vector_in_reg" style shuffles.
18960   if (SDValue V = combineTruncationShuffle(SVN, DAG))
18961     return V;
18962
18963   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18964       Level < AfterLegalizeVectorOps &&
18965       (N1.isUndef() ||
18966       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18967        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18968     if (SDValue V = partitionShuffleOfConcats(N, DAG))
18969       return V;
18970   }
18971
18972   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18973   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18974   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18975     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18976       return Res;
18977
18978   // If this shuffle only has a single input that is a bitcasted shuffle,
18979   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18980   // back to their original types.
18981   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18982       N1.isUndef() && Level < AfterLegalizeVectorOps &&
18983       TLI.isTypeLegal(VT)) {
18984     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18985       if (Scale == 1)
18986         return SmallVector<int, 8>(Mask.begin(), Mask.end());
18987
18988       SmallVector<int, 8> NewMask;
18989       for (int M : Mask)
18990         for (int s = 0; s != Scale; ++s)
18991           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18992       return NewMask;
18993     };
18994
18995     SDValue BC0 = peekThroughOneUseBitcasts(N0);
18996     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18997       EVT SVT = VT.getScalarType();
18998       EVT InnerVT = BC0->getValueType(0);
18999       EVT InnerSVT = InnerVT.getScalarType();
19000
19001       // Determine which shuffle works with the smaller scalar type.
19002       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
19003       EVT ScaleSVT = ScaleVT.getScalarType();
19004
19005       if (TLI.isTypeLegal(ScaleVT) &&
19006           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19007           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19008         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19009         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19010
19011         // Scale the shuffle masks to the smaller scalar type.
19012         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19013         SmallVector<int, 8> InnerMask =
19014             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19015         SmallVector<int, 8> OuterMask =
19016             ScaleShuffleMask(SVN->getMask(), OuterScale);
19017
19018         // Merge the shuffle masks.
19019         SmallVector<int, 8> NewMask;
19020         for (int M : OuterMask)
19021           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
19022
19023         // Test for shuffle mask legality over both commutations.
19024         SDValue SV0 = BC0->getOperand(0);
19025         SDValue SV1 = BC0->getOperand(1);
19026         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19027         if (!LegalMask) {
19028           std::swap(SV0, SV1);
19029           ShuffleVectorSDNode::commuteMask(NewMask);
19030           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19031         }
19032
19033         if (LegalMask) {
19034           SV0 = DAG.getBitcast(ScaleVT, SV0);
19035           SV1 = DAG.getBitcast(ScaleVT, SV1);
19036           return DAG.getBitcast(
19037               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19038         }
19039       }
19040     }
19041   }
19042
19043   // Canonicalize shuffles according to rules:
19044   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19045   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19046   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19047   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19048       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
19049       TLI.isTypeLegal(VT)) {
19050     // The incoming shuffle must be of the same type as the result of the
19051     // current shuffle.
19052     assert(N1->getOperand(0).getValueType() == VT &&
19053            "Shuffle types don't match");
19054
19055     SDValue SV0 = N1->getOperand(0);
19056     SDValue SV1 = N1->getOperand(1);
19057     bool HasSameOp0 = N0 == SV0;
19058     bool IsSV1Undef = SV1.isUndef();
19059     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
19060       // Commute the operands of this shuffle so that next rule
19061       // will trigger.
19062       return DAG.getCommutedVectorShuffle(*SVN);
19063   }
19064
19065   // Try to fold according to rules:
19066   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19067   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19068   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19069   // Don't try to fold shuffles with illegal type.
19070   // Only fold if this shuffle is the only user of the other shuffle.
19071   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
19072       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
19073     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19074
19075     // Don't try to fold splats; they're likely to simplify somehow, or they
19076     // might be free.
19077     if (OtherSV->isSplat())
19078       return SDValue();
19079
19080     // The incoming shuffle must be of the same type as the result of the
19081     // current shuffle.
19082     assert(OtherSV->getOperand(0).getValueType() == VT &&
19083            "Shuffle types don't match");
19084
19085     SDValue SV0, SV1;
19086     SmallVector<int, 4> Mask;
19087     // Compute the combined shuffle mask for a shuffle with SV0 as the first
19088     // operand, and SV1 as the second operand.
19089     for (unsigned i = 0; i != NumElts; ++i) {
19090       int Idx = SVN->getMaskElt(i);
19091       if (Idx < 0) {
19092         // Propagate Undef.
19093         Mask.push_back(Idx);
19094         continue;
19095       }
19096
19097       SDValue CurrentVec;
19098       if (Idx < (int)NumElts) {
19099         // This shuffle index refers to the inner shuffle N0. Lookup the inner
19100         // shuffle mask to identify which vector is actually referenced.
19101         Idx = OtherSV->getMaskElt(Idx);
19102         if (Idx < 0) {
19103           // Propagate Undef.
19104           Mask.push_back(Idx);
19105           continue;
19106         }
19107
19108         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
19109                                            : OtherSV->getOperand(1);
19110       } else {
19111         // This shuffle index references an element within N1.
19112         CurrentVec = N1;
19113       }
19114
19115       // Simple case where 'CurrentVec' is UNDEF.
19116       if (CurrentVec.isUndef()) {
19117         Mask.push_back(-1);
19118         continue;
19119       }
19120
19121       // Canonicalize the shuffle index. We don't know yet if CurrentVec
19122       // will be the first or second operand of the combined shuffle.
19123       Idx = Idx % NumElts;
19124       if (!SV0.getNode() || SV0 == CurrentVec) {
19125         // Ok. CurrentVec is the left hand side.
19126         // Update the mask accordingly.
19127         SV0 = CurrentVec;
19128         Mask.push_back(Idx);
19129         continue;
19130       }
19131
19132       // Bail out if we cannot convert the shuffle pair into a single shuffle.
19133       if (SV1.getNode() && SV1 != CurrentVec)
19134         return SDValue();
19135
19136       // Ok. CurrentVec is the right hand side.
19137       // Update the mask accordingly.
19138       SV1 = CurrentVec;
19139       Mask.push_back(Idx + NumElts);
19140     }
19141
19142     // Check if all indices in Mask are Undef. In case, propagate Undef.
19143     bool isUndefMask = true;
19144     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19145       isUndefMask &= Mask[i] < 0;
19146
19147     if (isUndefMask)
19148       return DAG.getUNDEF(VT);
19149
19150     if (!SV0.getNode())
19151       SV0 = DAG.getUNDEF(VT);
19152     if (!SV1.getNode())
19153       SV1 = DAG.getUNDEF(VT);
19154
19155     // Avoid introducing shuffles with illegal mask.
19156     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
19157       ShuffleVectorSDNode::commuteMask(Mask);
19158
19159       if (!TLI.isShuffleMaskLegal(Mask, VT))
19160         return SDValue();
19161
19162       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19163       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19164       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19165       std::swap(SV0, SV1);
19166     }
19167
19168     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19169     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19170     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19171     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
19172   }
19173
19174   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19175     return V;
19176
19177   return SDValue();
19178 }
19179
19180 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19181   SDValue InVal = N->getOperand(0);
19182   EVT VT = N->getValueType(0);
19183
19184   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19185   // with a VECTOR_SHUFFLE and possible truncate.
19186   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19187     SDValue InVec = InVal->getOperand(0);
19188     SDValue EltNo = InVal->getOperand(1);
19189     auto InVecT = InVec.getValueType();
19190     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19191       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19192       int Elt = C0->getZExtValue();
19193       NewMask[0] = Elt;
19194       SDValue Val;
19195       // If we have an implict truncate do truncate here as long as it's legal.
19196       // if it's not legal, this should
19197       if (VT.getScalarType() != InVal.getValueType() &&
19198           InVal.getValueType().isScalarInteger() &&
19199           isTypeLegal(VT.getScalarType())) {
19200         Val =
19201             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19202         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19203       }
19204       if (VT.getScalarType() == InVecT.getScalarType() &&
19205           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
19206           TLI.isShuffleMaskLegal(NewMask, VT)) {
19207         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
19208                                    DAG.getUNDEF(InVecT), NewMask);
19209         // If the initial vector is the correct size this shuffle is a
19210         // valid result.
19211         if (VT == InVecT)
19212           return Val;
19213         // If not we must truncate the vector.
19214         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19215           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
19216           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
19217           EVT SubVT =
19218               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
19219                                VT.getVectorNumElements());
19220           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
19221                             ZeroIdx);
19222           return Val;
19223         }
19224       }
19225     }
19226   }
19227
19228   return SDValue();
19229 }
19230
19231 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19232   EVT VT = N->getValueType(0);
19233   SDValue N0 = N->getOperand(0);
19234   SDValue N1 = N->getOperand(1);
19235   SDValue N2 = N->getOperand(2);
19236
19237   // If inserting an UNDEF, just return the original vector.
19238   if (N1.isUndef())
19239     return N0;
19240
19241   // If this is an insert of an extracted vector into an undef vector, we can
19242   // just use the input to the extract.
19243   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19244       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19245     return N1.getOperand(0);
19246
19247   // If we are inserting a bitcast value into an undef, with the same
19248   // number of elements, just use the bitcast input of the extract.
19249   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19250   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19251   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19252       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19253       N1.getOperand(0).getOperand(1) == N2 &&
19254       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19255           VT.getVectorNumElements() &&
19256       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19257           VT.getSizeInBits()) {
19258     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19259   }
19260
19261   // If both N1 and N2 are bitcast values on which insert_subvector
19262   // would makes sense, pull the bitcast through.
19263   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19264   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19265   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19266     SDValue CN0 = N0.getOperand(0);
19267     SDValue CN1 = N1.getOperand(0);
19268     EVT CN0VT = CN0.getValueType();
19269     EVT CN1VT = CN1.getValueType();
19270     if (CN0VT.isVector() && CN1VT.isVector() &&
19271         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19272         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
19273       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19274                                       CN0.getValueType(), CN0, CN1, N2);
19275       return DAG.getBitcast(VT, NewINSERT);
19276     }
19277   }
19278
19279   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19280   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19281   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19282   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19283       N0.getOperand(1).getValueType() == N1.getValueType() &&
19284       N0.getOperand(2) == N2)
19285     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19286                        N1, N2);
19287
19288   // Eliminate an intermediate insert into an undef vector:
19289   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19290   // insert_subvector undef, X, N2
19291   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19292       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19293     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19294                        N1.getOperand(1), N2);
19295
19296   if (!isa<ConstantSDNode>(N2))
19297     return SDValue();
19298
19299   uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19300
19301   // Push subvector bitcasts to the output, adjusting the index as we go.
19302   // insert_subvector(bitcast(v), bitcast(s), c1)
19303   // -> bitcast(insert_subvector(v, s, c2))
19304   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19305       N1.getOpcode() == ISD::BITCAST) {
19306     SDValue N0Src = peekThroughBitcasts(N0);
19307     SDValue N1Src = peekThroughBitcasts(N1);
19308     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19309     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19310     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19311         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19312       EVT NewVT;
19313       SDLoc DL(N);
19314       SDValue NewIdx;
19315       MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
19316       LLVMContext &Ctx = *DAG.getContext();
19317       unsigned NumElts = VT.getVectorNumElements();
19318       unsigned EltSizeInBits = VT.getScalarSizeInBits();
19319       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19320         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19321         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19322         NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
19323       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19324         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19325         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19326           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19327           NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
19328         }
19329       }
19330       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19331         SDValue Res = DAG.getBitcast(NewVT, N0Src);
19332         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19333         return DAG.getBitcast(VT, Res);
19334       }
19335     }
19336   }
19337
19338   // Canonicalize insert_subvector dag nodes.
19339   // Example:
19340   // (insert_subvector (insert_subvector A, Idx0), Idx1)
19341   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19342   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
19343       N1.getValueType() == N0.getOperand(1).getValueType() &&
19344       isa<ConstantSDNode>(N0.getOperand(2))) {
19345     unsigned OtherIdx = N0.getConstantOperandVal(2);
19346     if (InsIdx < OtherIdx) {
19347       // Swap nodes.
19348       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19349                                   N0.getOperand(0), N1, N2);
19350       AddToWorklist(NewOp.getNode());
19351       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19352                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19353     }
19354   }
19355
19356   // If the input vector is a concatenation, and the insert replaces
19357   // one of the pieces, we can optimize into a single concat_vectors.
19358   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
19359       N0.getOperand(0).getValueType() == N1.getValueType()) {
19360     unsigned Factor = N1.getValueType().getVectorNumElements();
19361
19362     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19363     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19364
19365     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19366   }
19367
19368   // Simplify source operands based on insertion.
19369   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19370     return SDValue(N, 0);
19371
19372   return SDValue();
19373 }
19374
19375 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19376   SDValue N0 = N->getOperand(0);
19377
19378   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19379   if (N0->getOpcode() == ISD::FP16_TO_FP)
19380     return N0->getOperand(0);
19381
19382   return SDValue();
19383 }
19384
19385 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19386   SDValue N0 = N->getOperand(0);
19387
19388   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19389   if (N0->getOpcode() == ISD::AND) {
19390     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
19391     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19392       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19393                          N0.getOperand(0));
19394     }
19395   }
19396
19397   return SDValue();
19398 }
19399
19400 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19401   SDValue N0 = N->getOperand(0);
19402   EVT VT = N0.getValueType();
19403   unsigned Opcode = N->getOpcode();
19404
19405   // VECREDUCE over 1-element vector is just an extract.
19406   if (VT.getVectorNumElements() == 1) {
19407     SDLoc dl(N);
19408     SDValue Res = DAG.getNode(
19409         ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
19410         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
19411     if (Res.getValueType() != N->getValueType(0))
19412       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19413     return Res;
19414   }
19415
19416   // On an boolean vector an and/or reduction is the same as a umin/umax
19417   // reduction. Convert them if the latter is legal while the former isn't.
19418   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
19419     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19420         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
19421     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19422         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19423         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
19424       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19425   }
19426
19427   return SDValue();
19428 }
19429
19430 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19431 /// with the destination vector and a zero vector.
19432 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19433 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
19434 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19435   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19436
19437   EVT VT = N->getValueType(0);
19438   SDValue LHS = N->getOperand(0);
19439   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19440   SDLoc DL(N);
19441
19442   // Make sure we're not running after operation legalization where it
19443   // may have custom lowered the vector shuffles.
19444   if (LegalOperations)
19445     return SDValue();
19446
19447   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19448     return SDValue();
19449
19450   EVT RVT = RHS.getValueType();
19451   unsigned NumElts = RHS.getNumOperands();
19452
19453   // Attempt to create a valid clear mask, splitting the mask into
19454   // sub elements and checking to see if each is
19455   // all zeros or all ones - suitable for shuffle masking.
19456   auto BuildClearMask = [&](int Split) {
19457     int NumSubElts = NumElts * Split;
19458     int NumSubBits = RVT.getScalarSizeInBits() / Split;
19459
19460     SmallVector<int, 8> Indices;
19461     for (int i = 0; i != NumSubElts; ++i) {
19462       int EltIdx = i / Split;
19463       int SubIdx = i % Split;
19464       SDValue Elt = RHS.getOperand(EltIdx);
19465       if (Elt.isUndef()) {
19466         Indices.push_back(-1);
19467         continue;
19468       }
19469
19470       APInt Bits;
19471       if (isa<ConstantSDNode>(Elt))
19472         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19473       else if (isa<ConstantFPSDNode>(Elt))
19474         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19475       else
19476         return SDValue();
19477
19478       // Extract the sub element from the constant bit mask.
19479       if (DAG.getDataLayout().isBigEndian()) {
19480         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
19481       } else {
19482         Bits.lshrInPlace(SubIdx * NumSubBits);
19483       }
19484
19485       if (Split > 1)
19486         Bits = Bits.trunc(NumSubBits);
19487
19488       if (Bits.isAllOnesValue())
19489         Indices.push_back(i);
19490       else if (Bits == 0)
19491         Indices.push_back(i + NumSubElts);
19492       else
19493         return SDValue();
19494     }
19495
19496     // Let's see if the target supports this vector_shuffle.
19497     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19498     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19499     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19500       return SDValue();
19501
19502     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19503     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19504                                                    DAG.getBitcast(ClearVT, LHS),
19505                                                    Zero, Indices));
19506   };
19507
19508   // Determine maximum split level (byte level masking).
19509   int MaxSplit = 1;
19510   if (RVT.getScalarSizeInBits() % 8 == 0)
19511     MaxSplit = RVT.getScalarSizeInBits() / 8;
19512
19513   for (int Split = 1; Split <= MaxSplit; ++Split)
19514     if (RVT.getScalarSizeInBits() % Split == 0)
19515       if (SDValue S = BuildClearMask(Split))
19516         return S;
19517
19518   return SDValue();
19519 }
19520
19521 /// If a vector binop is performed on splat values, it may be profitable to
19522 /// extract, scalarize, and insert/splat.
19523 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
19524   SDValue N0 = N->getOperand(0);
19525   SDValue N1 = N->getOperand(1);
19526   unsigned Opcode = N->getOpcode();
19527   EVT VT = N->getValueType(0);
19528   EVT EltVT = VT.getVectorElementType();
19529   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19530
19531   // TODO: Remove/replace the extract cost check? If the elements are available
19532   //       as scalars, then there may be no extract cost. Should we ask if
19533   //       inserting a scalar back into a vector is cheap instead?
19534   int Index0, Index1;
19535   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19536   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19537   if (!Src0 || !Src1 || Index0 != Index1 ||
19538       Src0.getValueType().getVectorElementType() != EltVT ||
19539       Src1.getValueType().getVectorElementType() != EltVT ||
19540       !TLI.isExtractVecEltCheap(VT, Index0) ||
19541       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
19542     return SDValue();
19543
19544   SDLoc DL(N);
19545   SDValue IndexC =
19546       DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19547   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19548   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19549   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19550
19551   // If all lanes but 1 are undefined, no need to splat the scalar result.
19552   // TODO: Keep track of undefs and use that info in the general case.
19553   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
19554       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
19555       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
19556     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19557     // build_vec ..undef, (bo X, Y), undef...
19558     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
19559     Ops[Index0] = ScalarBO;
19560     return DAG.getBuildVector(VT, DL, Ops);
19561   }
19562
19563   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19564   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
19565   return DAG.getBuildVector(VT, DL, Ops);
19566 }
19567
19568 /// Visit a binary vector operation, like ADD.
19569 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19570   assert(N->getValueType(0).isVector() &&
19571          "SimplifyVBinOp only works on vectors!");
19572
19573   SDValue LHS = N->getOperand(0);
19574   SDValue RHS = N->getOperand(1);
19575   SDValue Ops[] = {LHS, RHS};
19576   EVT VT = N->getValueType(0);
19577   unsigned Opcode = N->getOpcode();
19578
19579   // See if we can constant fold the vector operation.
19580   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
19581           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19582     return Fold;
19583
19584   // Move unary shuffles with identical masks after a vector binop:
19585   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19586   //   --> shuffle (VBinOp A, B), Undef, Mask
19587   // This does not require type legality checks because we are creating the
19588   // same types of operations that are in the original sequence. We do have to
19589   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19590   // though. This code is adapted from the identical transform in instcombine.
19591   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
19592       Opcode != ISD::UREM && Opcode != ISD::SREM &&
19593       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
19594     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19595     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19596     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
19597         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
19598         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
19599       SDLoc DL(N);
19600       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19601                                      RHS.getOperand(0), N->getFlags());
19602       SDValue UndefV = LHS.getOperand(1);
19603       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19604     }
19605   }
19606
19607   // The following pattern is likely to emerge with vector reduction ops. Moving
19608   // the binary operation ahead of insertion may allow using a narrower vector
19609   // instruction that has better performance than the wide version of the op:
19610   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19611   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
19612       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
19613       LHS.getOperand(2) == RHS.getOperand(2) &&
19614       (LHS.hasOneUse() || RHS.hasOneUse())) {
19615     SDValue X = LHS.getOperand(1);
19616     SDValue Y = RHS.getOperand(1);
19617     SDValue Z = LHS.getOperand(2);
19618     EVT NarrowVT = X.getValueType();
19619     if (NarrowVT == Y.getValueType() &&
19620         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19621       // (binop undef, undef) may not return undef, so compute that result.
19622       SDLoc DL(N);
19623       SDValue VecC =
19624           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19625       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19626       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19627     }
19628   }
19629
19630   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19631     return V;
19632
19633   return SDValue();
19634 }
19635
19636 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19637                                     SDValue N2) {
19638   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19639
19640   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19641                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
19642
19643   // If we got a simplified select_cc node back from SimplifySelectCC, then
19644   // break it down into a new SETCC node, and a new SELECT node, and then return
19645   // the SELECT node, since we were called with a SELECT node.
19646   if (SCC.getNode()) {
19647     // Check to see if we got a select_cc back (to turn into setcc/select).
19648     // Otherwise, just return whatever node we got back, like fabs.
19649     if (SCC.getOpcode() == ISD::SELECT_CC) {
19650       const SDNodeFlags Flags = N0.getNode()->getFlags();
19651       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
19652                                   N0.getValueType(),
19653                                   SCC.getOperand(0), SCC.getOperand(1),
19654                                   SCC.getOperand(4), Flags);
19655       AddToWorklist(SETCC.getNode());
19656       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19657                                          SCC.getOperand(2), SCC.getOperand(3));
19658       SelectNode->setFlags(Flags);
19659       return SelectNode;
19660     }
19661
19662     return SCC;
19663   }
19664   return SDValue();
19665 }
19666
19667 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19668 /// being selected between, see if we can simplify the select.  Callers of this
19669 /// should assume that TheSelect is deleted if this returns true.  As such, they
19670 /// should return the appropriate thing (e.g. the node) back to the top-level of
19671 /// the DAG combiner loop to avoid it being looked at.
19672 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19673                                     SDValue RHS) {
19674   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19675   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19676   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19677     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
19678       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19679       SDValue Sqrt = RHS;
19680       ISD::CondCode CC;
19681       SDValue CmpLHS;
19682       const ConstantFPSDNode *Zero = nullptr;
19683
19684       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19685         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19686         CmpLHS = TheSelect->getOperand(0);
19687         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19688       } else {
19689         // SELECT or VSELECT
19690         SDValue Cmp = TheSelect->getOperand(0);
19691         if (Cmp.getOpcode() == ISD::SETCC) {
19692           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19693           CmpLHS = Cmp.getOperand(0);
19694           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19695         }
19696       }
19697       if (Zero && Zero->isZero() &&
19698           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19699           CC == ISD::SETULT || CC == ISD::SETLT)) {
19700         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19701         CombineTo(TheSelect, Sqrt);
19702         return true;
19703       }
19704     }
19705   }
19706   // Cannot simplify select with vector condition
19707   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19708
19709   // If this is a select from two identical things, try to pull the operation
19710   // through the select.
19711   if (LHS.getOpcode() != RHS.getOpcode() ||
19712       !LHS.hasOneUse() || !RHS.hasOneUse())
19713     return false;
19714
19715   // If this is a load and the token chain is identical, replace the select
19716   // of two loads with a load through a select of the address to load from.
19717   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19718   // constants have been dropped into the constant pool.
19719   if (LHS.getOpcode() == ISD::LOAD) {
19720     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19721     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19722
19723     // Token chains must be identical.
19724     if (LHS.getOperand(0) != RHS.getOperand(0) ||
19725         // Do not let this transformation reduce the number of volatile loads.
19726         LLD->isVolatile() || RLD->isVolatile() ||
19727         // FIXME: If either is a pre/post inc/dec load,
19728         // we'd need to split out the address adjustment.
19729         LLD->isIndexed() || RLD->isIndexed() ||
19730         // If this is an EXTLOAD, the VT's must match.
19731         LLD->getMemoryVT() != RLD->getMemoryVT() ||
19732         // If this is an EXTLOAD, the kind of extension must match.
19733         (LLD->getExtensionType() != RLD->getExtensionType() &&
19734          // The only exception is if one of the extensions is anyext.
19735          LLD->getExtensionType() != ISD::EXTLOAD &&
19736          RLD->getExtensionType() != ISD::EXTLOAD) ||
19737         // FIXME: this discards src value information.  This is
19738         // over-conservative. It would be beneficial to be able to remember
19739         // both potential memory locations.  Since we are discarding
19740         // src value info, don't do the transformation if the memory
19741         // locations are not in the default address space.
19742         LLD->getPointerInfo().getAddrSpace() != 0 ||
19743         RLD->getPointerInfo().getAddrSpace() != 0 ||
19744         // We can't produce a CMOV of a TargetFrameIndex since we won't
19745         // generate the address generation required.
19746         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19747         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19748         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19749                                       LLD->getBasePtr().getValueType()))
19750       return false;
19751
19752     // The loads must not depend on one another.
19753     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19754       return false;
19755
19756     // Check that the select condition doesn't reach either load.  If so,
19757     // folding this will induce a cycle into the DAG.  If not, this is safe to
19758     // xform, so create a select of the addresses.
19759
19760     SmallPtrSet<const SDNode *, 32> Visited;
19761     SmallVector<const SDNode *, 16> Worklist;
19762
19763     // Always fail if LLD and RLD are not independent. TheSelect is a
19764     // predecessor to all Nodes in question so we need not search past it.
19765
19766     Visited.insert(TheSelect);
19767     Worklist.push_back(LLD);
19768     Worklist.push_back(RLD);
19769
19770     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19771         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19772       return false;
19773
19774     SDValue Addr;
19775     if (TheSelect->getOpcode() == ISD::SELECT) {
19776       // We cannot do this optimization if any pair of {RLD, LLD} is a
19777       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19778       // Loads, we only need to check if CondNode is a successor to one of the
19779       // loads. We can further avoid this if there's no use of their chain
19780       // value.
19781       SDNode *CondNode = TheSelect->getOperand(0).getNode();
19782       Worklist.push_back(CondNode);
19783
19784       if ((LLD->hasAnyUseOfValue(1) &&
19785            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19786           (RLD->hasAnyUseOfValue(1) &&
19787            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19788         return false;
19789
19790       Addr = DAG.getSelect(SDLoc(TheSelect),
19791                            LLD->getBasePtr().getValueType(),
19792                            TheSelect->getOperand(0), LLD->getBasePtr(),
19793                            RLD->getBasePtr());
19794     } else {  // Otherwise SELECT_CC
19795       // We cannot do this optimization if any pair of {RLD, LLD} is a
19796       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19797       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19798       // one of the loads. We can further avoid this if there's no use of their
19799       // chain value.
19800
19801       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19802       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19803       Worklist.push_back(CondLHS);
19804       Worklist.push_back(CondRHS);
19805
19806       if ((LLD->hasAnyUseOfValue(1) &&
19807            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19808           (RLD->hasAnyUseOfValue(1) &&
19809            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19810         return false;
19811
19812       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19813                          LLD->getBasePtr().getValueType(),
19814                          TheSelect->getOperand(0),
19815                          TheSelect->getOperand(1),
19816                          LLD->getBasePtr(), RLD->getBasePtr(),
19817                          TheSelect->getOperand(4));
19818     }
19819
19820     SDValue Load;
19821     // It is safe to replace the two loads if they have different alignments,
19822     // but the new load must be the minimum (most restrictive) alignment of the
19823     // inputs.
19824     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19825     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19826     if (!RLD->isInvariant())
19827       MMOFlags &= ~MachineMemOperand::MOInvariant;
19828     if (!RLD->isDereferenceable())
19829       MMOFlags &= ~MachineMemOperand::MODereferenceable;
19830     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19831       // FIXME: Discards pointer and AA info.
19832       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19833                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19834                          MMOFlags);
19835     } else {
19836       // FIXME: Discards pointer and AA info.
19837       Load = DAG.getExtLoad(
19838           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19839                                                   : LLD->getExtensionType(),
19840           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19841           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19842     }
19843
19844     // Users of the select now use the result of the load.
19845     CombineTo(TheSelect, Load);
19846
19847     // Users of the old loads now use the new load's chain.  We know the
19848     // old-load value is dead now.
19849     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19850     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19851     return true;
19852   }
19853
19854   return false;
19855 }
19856
19857 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19858 /// bitwise 'and'.
19859 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19860                                             SDValue N1, SDValue N2, SDValue N3,
19861                                             ISD::CondCode CC) {
19862   // If this is a select where the false operand is zero and the compare is a
19863   // check of the sign bit, see if we can perform the "gzip trick":
19864   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19865   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19866   EVT XType = N0.getValueType();
19867   EVT AType = N2.getValueType();
19868   if (!isNullConstant(N3) || !XType.bitsGE(AType))
19869     return SDValue();
19870
19871   // If the comparison is testing for a positive value, we have to invert
19872   // the sign bit mask, so only do that transform if the target has a bitwise
19873   // 'and not' instruction (the invert is free).
19874   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19875     // (X > -1) ? A : 0
19876     // (X >  0) ? X : 0 <-- This is canonical signed max.
19877     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19878       return SDValue();
19879   } else if (CC == ISD::SETLT) {
19880     // (X <  0) ? A : 0
19881     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
19882     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19883       return SDValue();
19884   } else {
19885     return SDValue();
19886   }
19887
19888   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19889   // constant.
19890   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19891   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19892   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19893     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19894     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19895     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19896     AddToWorklist(Shift.getNode());
19897
19898     if (XType.bitsGT(AType)) {
19899       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19900       AddToWorklist(Shift.getNode());
19901     }
19902
19903     if (CC == ISD::SETGT)
19904       Shift = DAG.getNOT(DL, Shift, AType);
19905
19906     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19907   }
19908
19909   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19910   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19911   AddToWorklist(Shift.getNode());
19912
19913   if (XType.bitsGT(AType)) {
19914     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19915     AddToWorklist(Shift.getNode());
19916   }
19917
19918   if (CC == ISD::SETGT)
19919     Shift = DAG.getNOT(DL, Shift, AType);
19920
19921   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19922 }
19923
19924 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19925 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19926 /// in it. This may be a win when the constant is not otherwise available
19927 /// because it replaces two constant pool loads with one.
19928 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19929     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19930     ISD::CondCode CC) {
19931   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
19932     return SDValue();
19933
19934   // If we are before legalize types, we want the other legalization to happen
19935   // first (for example, to avoid messing with soft float).
19936   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19937   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19938   EVT VT = N2.getValueType();
19939   if (!TV || !FV || !TLI.isTypeLegal(VT))
19940     return SDValue();
19941
19942   // If a constant can be materialized without loads, this does not make sense.
19943   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19944       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19945       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19946     return SDValue();
19947
19948   // If both constants have multiple uses, then we won't need to do an extra
19949   // load. The values are likely around in registers for other users.
19950   if (!TV->hasOneUse() && !FV->hasOneUse())
19951     return SDValue();
19952
19953   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19954                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19955   Type *FPTy = Elts[0]->getType();
19956   const DataLayout &TD = DAG.getDataLayout();
19957
19958   // Create a ConstantArray of the two constants.
19959   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19960   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19961                                       TD.getPrefTypeAlignment(FPTy));
19962   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19963
19964   // Get offsets to the 0 and 1 elements of the array, so we can select between
19965   // them.
19966   SDValue Zero = DAG.getIntPtrConstant(0, DL);
19967   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19968   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19969   SDValue Cond =
19970       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19971   AddToWorklist(Cond.getNode());
19972   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19973   AddToWorklist(CstOffset.getNode());
19974   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19975   AddToWorklist(CPIdx.getNode());
19976   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19977                      MachinePointerInfo::getConstantPool(
19978                          DAG.getMachineFunction()), Alignment);
19979 }
19980
19981 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19982 /// where 'cond' is the comparison specified by CC.
19983 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19984                                       SDValue N2, SDValue N3, ISD::CondCode CC,
19985                                       bool NotExtCompare) {
19986   // (x ? y : y) -> y.
19987   if (N2 == N3) return N2;
19988
19989   EVT CmpOpVT = N0.getValueType();
19990   EVT CmpResVT = getSetCCResultType(CmpOpVT);
19991   EVT VT = N2.getValueType();
19992   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19993   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19994   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19995
19996   // Determine if the condition we're dealing with is constant.
19997   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19998     AddToWorklist(SCC.getNode());
19999     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
20000       // fold select_cc true, x, y -> x
20001       // fold select_cc false, x, y -> y
20002       return !(SCCC->isNullValue()) ? N2 : N3;
20003     }
20004   }
20005
20006   if (SDValue V =
20007           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20008     return V;
20009
20010   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20011     return V;
20012
20013   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20014   // where y is has a single bit set.
20015   // A plaintext description would be, we can turn the SELECT_CC into an AND
20016   // when the condition can be materialized as an all-ones register.  Any
20017   // single bit-test can be materialized as an all-ones register with
20018   // shift-left and shift-right-arith.
20019   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
20020       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
20021     SDValue AndLHS = N0->getOperand(0);
20022     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20023     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
20024       // Shift the tested bit over the sign bit.
20025       const APInt &AndMask = ConstAndRHS->getAPIntValue();
20026       SDValue ShlAmt =
20027         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20028                         getShiftAmountTy(AndLHS.getValueType()));
20029       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20030
20031       // Now arithmetic right shift it all the way over, so the result is either
20032       // all-ones, or zero.
20033       SDValue ShrAmt =
20034         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
20035                         getShiftAmountTy(Shl.getValueType()));
20036       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20037
20038       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20039     }
20040   }
20041
20042   // fold select C, 16, 0 -> shl C, 4
20043   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
20044   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
20045
20046   if ((Fold || Swap) &&
20047       TLI.getBooleanContents(CmpOpVT) ==
20048           TargetLowering::ZeroOrOneBooleanContent &&
20049       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
20050
20051     if (Swap) {
20052       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
20053       std::swap(N2C, N3C);
20054     }
20055
20056     // If the caller doesn't want us to simplify this into a zext of a compare,
20057     // don't do it.
20058     if (NotExtCompare && N2C->isOne())
20059       return SDValue();
20060
20061     SDValue Temp, SCC;
20062     // zext (setcc n0, n1)
20063     if (LegalTypes) {
20064       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20065       if (VT.bitsLT(SCC.getValueType()))
20066         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20067       else
20068         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20069     } else {
20070       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20071       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20072     }
20073
20074     AddToWorklist(SCC.getNode());
20075     AddToWorklist(Temp.getNode());
20076
20077     if (N2C->isOne())
20078       return Temp;
20079
20080     // shl setcc result by log2 n2c
20081     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20082                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
20083                                        SDLoc(Temp),
20084                                        getShiftAmountTy(Temp.getValueType())));
20085   }
20086
20087   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20088   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20089   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20090   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20091   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20092   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20093   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20094   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20095   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
20096     SDValue ValueOnZero = N2;
20097     SDValue Count = N3;
20098     // If the condition is NE instead of E, swap the operands.
20099     if (CC == ISD::SETNE)
20100       std::swap(ValueOnZero, Count);
20101     // Check if the value on zero is a constant equal to the bits in the type.
20102     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20103       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20104         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20105         // legal, combine to just cttz.
20106         if ((Count.getOpcode() == ISD::CTTZ ||
20107              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20108             N0 == Count.getOperand(0) &&
20109             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20110           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20111         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20112         // legal, combine to just ctlz.
20113         if ((Count.getOpcode() == ISD::CTLZ ||
20114              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20115             N0 == Count.getOperand(0) &&
20116             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20117           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20118       }
20119     }
20120   }
20121
20122   return SDValue();
20123 }
20124
20125 /// This is a stub for TargetLowering::SimplifySetCC.
20126 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20127                                    ISD::CondCode Cond, const SDLoc &DL,
20128                                    bool foldBooleans) {
20129   TargetLowering::DAGCombinerInfo
20130     DagCombineInfo(DAG, Level, false, this);
20131   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20132 }
20133
20134 /// Given an ISD::SDIV node expressing a divide by constant, return
20135 /// a DAG expression to select that will generate the same value by multiplying
20136 /// by a magic number.
20137 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20138 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20139   // when optimising for minimum size, we don't want to expand a div to a mul
20140   // and a shift.
20141   if (DAG.getMachineFunction().getFunction().hasMinSize())
20142     return SDValue();
20143
20144   SmallVector<SDNode *, 8> Built;
20145   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20146     for (SDNode *N : Built)
20147       AddToWorklist(N);
20148     return S;
20149   }
20150
20151   return SDValue();
20152 }
20153
20154 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20155 /// DAG expression that will generate the same value by right shifting.
20156 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20157   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20158   if (!C)
20159     return SDValue();
20160
20161   // Avoid division by zero.
20162   if (C->isNullValue())
20163     return SDValue();
20164
20165   SmallVector<SDNode *, 8> Built;
20166   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20167     for (SDNode *N : Built)
20168       AddToWorklist(N);
20169     return S;
20170   }
20171
20172   return SDValue();
20173 }
20174
20175 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20176 /// expression that will generate the same value by multiplying by a magic
20177 /// number.
20178 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20179 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20180   // when optimising for minimum size, we don't want to expand a div to a mul
20181   // and a shift.
20182   if (DAG.getMachineFunction().getFunction().hasMinSize())
20183     return SDValue();
20184
20185   SmallVector<SDNode *, 8> Built;
20186   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20187     for (SDNode *N : Built)
20188       AddToWorklist(N);
20189     return S;
20190   }
20191
20192   return SDValue();
20193 }
20194
20195 /// Determines the LogBase2 value for a non-null input value using the
20196 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20197 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20198   EVT VT = V.getValueType();
20199   unsigned EltBits = VT.getScalarSizeInBits();
20200   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20201   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20202   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20203   return LogBase2;
20204 }
20205
20206 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20207 /// For the reciprocal, we need to find the zero of the function:
20208 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
20209 ///     =>
20210 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20211 ///     does not require additional intermediate precision]
20212 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
20213   if (Level >= AfterLegalizeDAG)
20214     return SDValue();
20215
20216   // TODO: Handle half and/or extended types?
20217   EVT VT = Op.getValueType();
20218   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20219     return SDValue();
20220
20221   // If estimates are explicitly disabled for this function, we're done.
20222   MachineFunction &MF = DAG.getMachineFunction();
20223   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20224   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20225     return SDValue();
20226
20227   // Estimates may be explicitly enabled for this type with a custom number of
20228   // refinement steps.
20229   int Iterations = TLI.getDivRefinementSteps(VT, MF);
20230   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20231     AddToWorklist(Est.getNode());
20232
20233     if (Iterations) {
20234       SDLoc DL(Op);
20235       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20236
20237       // Newton iterations: Est = Est + Est (1 - Arg * Est)
20238       for (int i = 0; i < Iterations; ++i) {
20239         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
20240         AddToWorklist(NewEst.getNode());
20241
20242         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
20243         AddToWorklist(NewEst.getNode());
20244
20245         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20246         AddToWorklist(NewEst.getNode());
20247
20248         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
20249         AddToWorklist(Est.getNode());
20250       }
20251     }
20252     return Est;
20253   }
20254
20255   return SDValue();
20256 }
20257
20258 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20259 /// For the reciprocal sqrt, we need to find the zero of the function:
20260 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20261 ///     =>
20262 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20263 /// As a result, we precompute A/2 prior to the iteration loop.
20264 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20265                                          unsigned Iterations,
20266                                          SDNodeFlags Flags, bool Reciprocal) {
20267   EVT VT = Arg.getValueType();
20268   SDLoc DL(Arg);
20269   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20270
20271   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20272   // this entire sequence requires only one FP constant.
20273   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20274   AddToWorklist(HalfArg.getNode());
20275
20276   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20277   AddToWorklist(HalfArg.getNode());
20278
20279   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20280   for (unsigned i = 0; i < Iterations; ++i) {
20281     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20282     AddToWorklist(NewEst.getNode());
20283
20284     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20285     AddToWorklist(NewEst.getNode());
20286
20287     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20288     AddToWorklist(NewEst.getNode());
20289
20290     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20291     AddToWorklist(Est.getNode());
20292   }
20293
20294   // If non-reciprocal square root is requested, multiply the result by Arg.
20295   if (!Reciprocal) {
20296     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20297     AddToWorklist(Est.getNode());
20298   }
20299
20300   return Est;
20301 }
20302
20303 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20304 /// For the reciprocal sqrt, we need to find the zero of the function:
20305 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20306 ///     =>
20307 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20308 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20309                                          unsigned Iterations,
20310                                          SDNodeFlags Flags, bool Reciprocal) {
20311   EVT VT = Arg.getValueType();
20312   SDLoc DL(Arg);
20313   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20314   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20315
20316   // This routine must enter the loop below to work correctly
20317   // when (Reciprocal == false).
20318   assert(Iterations > 0);
20319
20320   // Newton iterations for reciprocal square root:
20321   // E = (E * -0.5) * ((A * E) * E + -3.0)
20322   for (unsigned i = 0; i < Iterations; ++i) {
20323     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20324     AddToWorklist(AE.getNode());
20325
20326     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20327     AddToWorklist(AEE.getNode());
20328
20329     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20330     AddToWorklist(RHS.getNode());
20331
20332     // When calculating a square root at the last iteration build:
20333     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20334     // (notice a common subexpression)
20335     SDValue LHS;
20336     if (Reciprocal || (i + 1) < Iterations) {
20337       // RSQRT: LHS = (E * -0.5)
20338       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20339     } else {
20340       // SQRT: LHS = (A * E) * -0.5
20341       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20342     }
20343     AddToWorklist(LHS.getNode());
20344
20345     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20346     AddToWorklist(Est.getNode());
20347   }
20348
20349   return Est;
20350 }
20351
20352 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20353 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20354 /// Op can be zero.
20355 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20356                                            bool Reciprocal) {
20357   if (Level >= AfterLegalizeDAG)
20358     return SDValue();
20359
20360   // TODO: Handle half and/or extended types?
20361   EVT VT = Op.getValueType();
20362   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20363     return SDValue();
20364
20365   // If estimates are explicitly disabled for this function, we're done.
20366   MachineFunction &MF = DAG.getMachineFunction();
20367   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20368   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20369     return SDValue();
20370
20371   // Estimates may be explicitly enabled for this type with a custom number of
20372   // refinement steps.
20373   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20374
20375   bool UseOneConstNR = false;
20376   if (SDValue Est =
20377       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20378                           Reciprocal)) {
20379     AddToWorklist(Est.getNode());
20380
20381     if (Iterations) {
20382       Est = UseOneConstNR
20383             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
20384             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
20385
20386       if (!Reciprocal) {
20387         // The estimate is now completely wrong if the input was exactly 0.0 or
20388         // possibly a denormal. Force the answer to 0.0 for those cases.
20389         SDLoc DL(Op);
20390         EVT CCVT = getSetCCResultType(VT);
20391         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
20392         const Function &F = DAG.getMachineFunction().getFunction();
20393         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
20394         if (Denorms.getValueAsString().equals("ieee")) {
20395           // fabs(X) < SmallestNormal ? 0.0 : Est
20396           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20397           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20398           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20399           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20400           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20401           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20402           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20403           AddToWorklist(Fabs.getNode());
20404           AddToWorklist(IsDenorm.getNode());
20405           AddToWorklist(Est.getNode());
20406         } else {
20407           // X == 0.0 ? 0.0 : Est
20408           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20409           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20410           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20411           AddToWorklist(IsZero.getNode());
20412           AddToWorklist(Est.getNode());
20413         }
20414       }
20415     }
20416     return Est;
20417   }
20418
20419   return SDValue();
20420 }
20421
20422 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20423   return buildSqrtEstimateImpl(Op, Flags, true);
20424 }
20425
20426 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20427   return buildSqrtEstimateImpl(Op, Flags, false);
20428 }
20429
20430 /// Return true if there is any possibility that the two addresses overlap.
20431 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20432
20433   struct MemUseCharacteristics {
20434     bool IsVolatile;
20435     SDValue BasePtr;
20436     int64_t Offset;
20437     Optional<int64_t> NumBytes;
20438     MachineMemOperand *MMO;
20439   };
20440
20441   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20442     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20443       int64_t Offset = 0;
20444       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20445         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20446                      ? C->getSExtValue()
20447                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
20448                            ? -1 * C->getSExtValue()
20449                            : 0;
20450       return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
20451               Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
20452               LSN->getMemOperand()};
20453     }
20454     if (const auto *LN = cast<LifetimeSDNode>(N))
20455       return {false /*isVolatile*/, LN->getOperand(1),
20456               (LN->hasOffset()) ? LN->getOffset() : 0,
20457               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
20458                                 : Optional<int64_t>(),
20459               (MachineMemOperand *)nullptr};
20460     // Default.
20461     return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
20462             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20463   };
20464
20465   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20466                         MUC1 = getCharacteristics(Op1);
20467
20468   // If they are to the same address, then they must be aliases.
20469   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20470       MUC0.Offset == MUC1.Offset)
20471     return true;
20472
20473   // If they are both volatile then they cannot be reordered.
20474   if (MUC0.IsVolatile && MUC1.IsVolatile)
20475     return true;
20476
20477   if (MUC0.MMO && MUC1.MMO) {
20478     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20479         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20480       return false;
20481   }
20482
20483   // Try to prove that there is aliasing, or that there is no aliasing. Either
20484   // way, we can return now. If nothing can be proved, proceed with more tests.
20485   bool IsAlias;
20486   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
20487                                        DAG, IsAlias))
20488     return IsAlias;
20489
20490   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
20491   // either are not known.
20492   if (!MUC0.MMO || !MUC1.MMO)
20493     return true;
20494
20495   // If one operation reads from invariant memory, and the other may store, they
20496   // cannot alias. These should really be checking the equivalent of mayWrite,
20497   // but it only matters for memory nodes other than load /store.
20498   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20499       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20500     return false;
20501
20502   // If we know required SrcValue1 and SrcValue2 have relatively large
20503   // alignment compared to the size and offset of the access, we may be able
20504   // to prove they do not alias. This check is conservative for now to catch
20505   // cases created by splitting vector types.
20506   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
20507   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
20508   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
20509   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
20510   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
20511       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
20512       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
20513     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
20514     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
20515
20516     // There is no overlap between these relatively aligned accesses of
20517     // similar size. Return no alias.
20518     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
20519         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
20520       return false;
20521   }
20522
20523   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20524                    ? CombinerGlobalAA
20525                    : DAG.getSubtarget().useAA();
20526 #ifndef NDEBUG
20527   if (CombinerAAOnlyFunc.getNumOccurrences() &&
20528       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
20529     UseAA = false;
20530 #endif
20531
20532   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
20533     // Use alias analysis information.
20534     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20535     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20536     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20537     AliasResult AAResult = AA->alias(
20538         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20539                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
20540         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20541                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
20542     if (AAResult == NoAlias)
20543       return false;
20544   }
20545
20546   // Otherwise we have to assume they alias.
20547   return true;
20548 }
20549
20550 /// Walk up chain skipping non-aliasing memory nodes,
20551 /// looking for aliasing nodes and adding them to the Aliases vector.
20552 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20553                                    SmallVectorImpl<SDValue> &Aliases) {
20554   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
20555   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
20556
20557   // Get alias information for node.
20558   const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
20559
20560   // Starting off.
20561   Chains.push_back(OriginalChain);
20562   unsigned Depth = 0;
20563
20564   // Attempt to improve chain by a single step
20565   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20566     switch (C.getOpcode()) {
20567     case ISD::EntryToken:
20568       // No need to mark EntryToken.
20569       C = SDValue();
20570       return true;
20571     case ISD::LOAD:
20572     case ISD::STORE: {
20573       // Get alias information for C.
20574       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20575                       !cast<LSBaseSDNode>(C.getNode())->isVolatile();
20576       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
20577         // Look further up the chain.
20578         C = C.getOperand(0);
20579         return true;
20580       }
20581       // Alias, so stop here.
20582       return false;
20583     }
20584
20585     case ISD::CopyFromReg:
20586       // Always forward past past CopyFromReg.
20587       C = C.getOperand(0);
20588       return true;
20589
20590     case ISD::LIFETIME_START:
20591     case ISD::LIFETIME_END: {
20592       // We can forward past any lifetime start/end that can be proven not to
20593       // alias the memory access.
20594       if (!isAlias(N, C.getNode())) {
20595         // Look further up the chain.
20596         C = C.getOperand(0);
20597         return true;
20598       }
20599       return false;
20600     }
20601     default:
20602       return false;
20603     }
20604   };
20605
20606   // Look at each chain and determine if it is an alias.  If so, add it to the
20607   // aliases list.  If not, then continue up the chain looking for the next
20608   // candidate.
20609   while (!Chains.empty()) {
20610     SDValue Chain = Chains.pop_back_val();
20611
20612     // Don't bother if we've seen Chain before.
20613     if (!Visited.insert(Chain.getNode()).second)
20614       continue;
20615
20616     // For TokenFactor nodes, look at each operand and only continue up the
20617     // chain until we reach the depth limit.
20618     //
20619     // FIXME: The depth check could be made to return the last non-aliasing
20620     // chain we found before we hit a tokenfactor rather than the original
20621     // chain.
20622     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20623       Aliases.clear();
20624       Aliases.push_back(OriginalChain);
20625       return;
20626     }
20627
20628     if (Chain.getOpcode() == ISD::TokenFactor) {
20629       // We have to check each of the operands of the token factor for "small"
20630       // token factors, so we queue them up.  Adding the operands to the queue
20631       // (stack) in reverse order maintains the original order and increases the
20632       // likelihood that getNode will find a matching token factor (CSE.)
20633       if (Chain.getNumOperands() > 16) {
20634         Aliases.push_back(Chain);
20635         continue;
20636       }
20637       for (unsigned n = Chain.getNumOperands(); n;)
20638         Chains.push_back(Chain.getOperand(--n));
20639       ++Depth;
20640       continue;
20641     }
20642     // Everything else
20643     if (ImproveChain(Chain)) {
20644       // Updated Chain Found, Consider new chain if one exists.
20645       if (Chain.getNode())
20646         Chains.push_back(Chain);
20647       ++Depth;
20648       continue;
20649     }
20650     // No Improved Chain Possible, treat as Alias.
20651     Aliases.push_back(Chain);
20652   }
20653 }
20654
20655 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20656 /// (aliasing node.)
20657 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20658   if (OptLevel == CodeGenOpt::None)
20659     return OldChain;
20660
20661   // Ops for replacing token factor.
20662   SmallVector<SDValue, 8> Aliases;
20663
20664   // Accumulate all the aliases to this node.
20665   GatherAllAliases(N, OldChain, Aliases);
20666
20667   // If no operands then chain to entry token.
20668   if (Aliases.size() == 0)
20669     return DAG.getEntryNode();
20670
20671   // If a single operand then chain to it.  We don't need to revisit it.
20672   if (Aliases.size() == 1)
20673     return Aliases[0];
20674
20675   // Construct a custom tailored token factor.
20676   return DAG.getTokenFactor(SDLoc(N), Aliases);
20677 }
20678
20679 namespace {
20680 // TODO: Replace with with std::monostate when we move to C++17.
20681 struct UnitT { } Unit;
20682 bool operator==(const UnitT &, const UnitT &) { return true; }
20683 bool operator!=(const UnitT &, const UnitT &) { return false; }
20684 } // namespace
20685
20686 // This function tries to collect a bunch of potentially interesting
20687 // nodes to improve the chains of, all at once. This might seem
20688 // redundant, as this function gets called when visiting every store
20689 // node, so why not let the work be done on each store as it's visited?
20690 //
20691 // I believe this is mainly important because MergeConsecutiveStores
20692 // is unable to deal with merging stores of different sizes, so unless
20693 // we improve the chains of all the potential candidates up-front
20694 // before running MergeConsecutiveStores, it might only see some of
20695 // the nodes that will eventually be candidates, and then not be able
20696 // to go from a partially-merged state to the desired final
20697 // fully-merged state.
20698
20699 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20700   SmallVector<StoreSDNode *, 8> ChainedStores;
20701   StoreSDNode *STChain = St;
20702   // Intervals records which offsets from BaseIndex have been covered. In
20703   // the common case, every store writes to the immediately previous address
20704   // space and thus merged with the previous interval at insertion time.
20705
20706   using IMap =
20707       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20708   IMap::Allocator A;
20709   IMap Intervals(A);
20710
20711   // This holds the base pointer, index, and the offset in bytes from the base
20712   // pointer.
20713   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20714
20715   // We must have a base and an offset.
20716   if (!BasePtr.getBase().getNode())
20717     return false;
20718
20719   // Do not handle stores to undef base pointers.
20720   if (BasePtr.getBase().isUndef())
20721     return false;
20722
20723   // Add ST's interval.
20724   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20725
20726   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20727     // If the chain has more than one use, then we can't reorder the mem ops.
20728     if (!SDValue(Chain, 0)->hasOneUse())
20729       break;
20730     if (Chain->isVolatile() || Chain->isIndexed())
20731       break;
20732
20733     // Find the base pointer and offset for this memory node.
20734     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20735     // Check that the base pointer is the same as the original one.
20736     int64_t Offset;
20737     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20738       break;
20739     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20740     // Make sure we don't overlap with other intervals by checking the ones to
20741     // the left or right before inserting.
20742     auto I = Intervals.find(Offset);
20743     // If there's a next interval, we should end before it.
20744     if (I != Intervals.end() && I.start() < (Offset + Length))
20745       break;
20746     // If there's a previous interval, we should start after it.
20747     if (I != Intervals.begin() && (--I).stop() <= Offset)
20748       break;
20749     Intervals.insert(Offset, Offset + Length, Unit);
20750
20751     ChainedStores.push_back(Chain);
20752     STChain = Chain;
20753   }
20754
20755   // If we didn't find a chained store, exit.
20756   if (ChainedStores.size() == 0)
20757     return false;
20758
20759   // Improve all chained stores (St and ChainedStores members) starting from
20760   // where the store chain ended and return single TokenFactor.
20761   SDValue NewChain = STChain->getChain();
20762   SmallVector<SDValue, 8> TFOps;
20763   for (unsigned I = ChainedStores.size(); I;) {
20764     StoreSDNode *S = ChainedStores[--I];
20765     SDValue BetterChain = FindBetterChain(S, NewChain);
20766     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20767         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20768     TFOps.push_back(SDValue(S, 0));
20769     ChainedStores[I] = S;
20770   }
20771
20772   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20773   SDValue BetterChain = FindBetterChain(St, NewChain);
20774   SDValue NewST;
20775   if (St->isTruncatingStore())
20776     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20777                               St->getBasePtr(), St->getMemoryVT(),
20778                               St->getMemOperand());
20779   else
20780     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20781                          St->getBasePtr(), St->getMemOperand());
20782
20783   TFOps.push_back(NewST);
20784
20785   // If we improved every element of TFOps, then we've lost the dependence on
20786   // NewChain to successors of St and we need to add it back to TFOps. Do so at
20787   // the beginning to keep relative order consistent with FindBetterChains.
20788   auto hasImprovedChain = [&](SDValue ST) -> bool {
20789     return ST->getOperand(0) != NewChain;
20790   };
20791   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20792   if (AddNewChain)
20793     TFOps.insert(TFOps.begin(), NewChain);
20794
20795   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20796   CombineTo(St, TF);
20797
20798   AddToWorklist(STChain);
20799   // Add TF operands worklist in reverse order.
20800   for (auto I = TF->getNumOperands(); I;)
20801     AddToWorklist(TF->getOperand(--I).getNode());
20802   AddToWorklist(TF.getNode());
20803   return true;
20804 }
20805
20806 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20807   if (OptLevel == CodeGenOpt::None)
20808     return false;
20809
20810   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20811
20812   // We must have a base and an offset.
20813   if (!BasePtr.getBase().getNode())
20814     return false;
20815
20816   // Do not handle stores to undef base pointers.
20817   if (BasePtr.getBase().isUndef())
20818     return false;
20819
20820   // Directly improve a chain of disjoint stores starting at St.
20821   if (parallelizeChainedStores(St))
20822     return true;
20823
20824   // Improve St's Chain..
20825   SDValue BetterChain = FindBetterChain(St, St->getChain());
20826   if (St->getChain() != BetterChain) {
20827     replaceStoreChain(St, BetterChain);
20828     return true;
20829   }
20830   return false;
20831 }
20832
20833 /// This is the entry point for the file.
20834 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20835                            CodeGenOpt::Level OptLevel) {
20836   /// This is the main entry point to this class.
20837   DAGCombiner(*this, AA, OptLevel).Run(Level);
20838 }